dromerosm's picture
Update app.py
b6feec9
import gradio as gr
import os
import openai
from newspaper import Article
import json
import re
from transformers import GPT2Tokenizer
import requests
# define the text summarizer function
def text_prompt(request, system_role, page_urls_str, api_key, api_base, deployment_id, temp):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
page_urls = [page_url_str for page_url_str in page_urls_str.split("\n") if page_url_str]
if len(page_urls) == 0:
return "", "urls not found", ""
page_texts = []
response_texts = []
total_tokens = 0
for page_url in page_urls:
try:
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
response = requests.get(page_url, headers=headers)
html = response.text
page = Article('')
page.set_html(html)
page.parse()
except Exception as e:
return "", f"--- An error occurred while processing the URL: {e} ---", ""
sentences = page.text.split('.')
tokens = []
page_text = ""
for sentence in sentences:
tokens.extend(tokenizer.tokenize(sentence))
# Trim text to a maximum of 3100 tokens
if len(tokens) > 3100:
break
page_text += sentence + ". "
tokens.extend(tokenizer.tokenize(request))
tokens.extend(tokenizer.tokenize(system_role))
# Delete the last space
page_text = page_text.strip()
num_tokens = len(tokens)
tokens_condition = num_tokens > 10
api_key_condition = len(api_key) > 6
deployment_id_condition = len(deployment_id) > 6
if tokens_condition and api_key_condition and deployment_id_condition:
openai.api_type = "azure"
openai.api_version = "2023-05-15"
openai.api_base = api_base
openai.api_key = api_key
max_tokens = 4000 - num_tokens # TODO: change 4096 to a dictionary with the max tokens for each deploymend_id
# get the response from openai API
try:
response = openai.ChatCompletion.create(
deployment_id=deployment_id,
messages=[
{"role": "system", "content": system_role},
{"role": "user", "content": request + "\n\n" + 'Text:\n\n""""' + page_text + '\n""""'}
],
max_tokens=max_tokens,
temperature=temp,
top_p=1.0,
)
# get the response text
response_text = response['choices'][0]['message']['content']
total_tokens += response["usage"]["total_tokens"]
# clean the response text
response_text = re.sub(r'\s+', ' ', response_text)
response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}\n"
except Exception as e:
response_text = f"#### [{page.title}]({page_url})\n\n"
response_text += f"--- An error occurred while processing the request: {e} ---\n"
page_texts.append(page.text)
response_texts.append(response_text)
else:
page_text_temp = "ERROR:\n\n"
if page.text:
page_text_temp += page.text
response_text_temp = "#### "
if page.title:
response_text_temp += f"[{page.title}]({page_url})"
if not tokens_condition:
response_text_temp += "\n\nERROR: Tokens problems! Maybe it can't read the URL. "
if not api_key_condition:
response_text_temp += "\n\nERROR: API Key problems! Copy and paste the API Key (be careful with copying spaces at the beginning or end of the API Key). "
if not deployment_id_condition:
response_text_temp += "\n\nERROR: Deployment_id problems! Copy and paste the deployment_id (be careful with copying spaces at the beginning or end of the deployment_id). "
page_texts.append(page_text_temp)
response_texts.append(response_text_temp)
page_texts_str = "".join([f"====== NEW URL: {URL} ======\n{page_text}\n\n" for page_text, URL in zip(page_texts, page_urls)])
response_texts_str = "\n\n".join([response_text for response_text in response_texts])
total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens / 1000 * 0.03)
return page_texts_str, response_texts_str, total_tokens_str
# define the gradio interface
iface = gr.Interface(
fn=text_prompt,
inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System Role:", type="text"),
gr.Textbox(lines=10, placeholder="Enter the Articles' URLs here...", label="Articles' URLs to parse (one per line up to 10):", type="text"),
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
gr.Textbox(lines=1, placeholder="Enter your Azure OpenAI API base here...", label="Enter Azure API base (Endpoint):", type="text"),
gr.Textbox(lines=1, placeholder="Enter your model name here...", label="Deployment ID:", type="text"),
gr.Slider(0.0, 1.0, value=0.0, label="Temperature:")
],
outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
title="ChatGPT info extraction from URL",
description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using MSFT Azure OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
)
# error capturing in integration as a component
error_message = ""
try:
iface.queue(concurrency_count=20)
iface.launch(debug=True)
except Exception as e:
error_message = "An error occurred: " + str(e)
iface.outputs[1].value = error_message