Spaces:
Runtime error
Runtime error
File size: 4,963 Bytes
4e003b1 5a5189b 38c31b9 4e003b1 30c813e 4e003b1 d8f1bff 4e003b1 d8f1bff 4e003b1 d8f1bff 4e003b1 30c813e 4e003b1 c791143 4e003b1 c791143 4e003b1 c791143 4e003b1 30c813e 4e003b1 30c813e 4e003b1 9b8786e d8f1bff 4e003b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import os
import openai
from newspaper import Article
from newspaper import Config
import json
import re
from transformers import GPT2Tokenizer
import requests
# define the text summarizer function
def text_prompt(request, system_role, page_url, api_key, temp):
try:
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10
article = Article(page_url, config=config)
article.download()
article.parse()
except Exception as e:
return "", f"--- An error occurred while processing the URL: {e} ---", ""
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
#TODO: for chinese, separator is '。'
sentences = article.text.split('.')
tokens = []
page_text = ""
for sentence in sentences:
tokens.extend(tokenizer.tokenize(sentence))
# Trim text to a maximum of 3100 tokens
if len(tokens) > 3100:
break
page_text += sentence + ". "
# Delete the last space
page_text = page_text.strip()
num_tokens = len(tokens)
if num_tokens > 10 and api_key.startswith("sk-"):
openai.api_key = api_key
# get the response from openai API
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_role},
{"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
],
max_tokens=512,
temperature=temp,
top_p=1.0,
)
# get the response text
response_text = response['choices'][0]['message']['content']
total_tokens = response["usage"]["total_tokens"]
# clean the response text
response_text = re.sub(r'\s+', ' ', response_text)
response_text = f"#### [{page.title}]({article_url})\n\n{response_text.strip()}"
total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)
return article.text, response_text, total_tokens_str
except Exception as e:
return article.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
return article.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)
# define the gradio interface
iface = gr.Interface(
fn=text_prompt,
inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
gr.Textbox(lines=1, placeholder="Enter your gpt-role description here...", label="GPT Role:", type="text"),
gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
gr.Slider(0.0,1.0, value=0.7, label="Temperature:")
],
outputs=[gr.Textbox(label="Output:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
examples=[["请用简体中文生成一段200字的摘要,并提取5个关键词.", "作为新闻编辑", "https://openai.com/blog/planning-for-agi-and-beyond","",0.7],
["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
],
title="ChatGPT info extraction from URL",
description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
)
# error capturing in integration as a component
error_message = ""
try:
iface.queue(concurrency_count=20)
iface.launch()
except Exception as e:
error_message = "An error occurred: " + str(e)
iface.outputs[1].value = error_message |