Spaces:
Runtime error
Runtime error
File size: 6,353 Bytes
3578b4b b6feec9 3578b4b 0a06e99 3578b4b 0a06e99 3578b4b 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 b6feec9 0a06e99 3578b4b 53610f9 0a06e99 3578b4b b6feec9 0a06e99 3578b4b 0a06e99 3578b4b 0a06e99 3578b4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
import os
import openai
from newspaper import Article
import json
import re
from transformers import GPT2Tokenizer
import requests
# define the text summarizer function
def text_prompt(request, system_role, page_urls_str, api_key, api_base, deployment_id, temp):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
page_urls = [page_url_str for page_url_str in page_urls_str.split("\n") if page_url_str]
if len(page_urls) == 0:
return "", "urls not found", ""
page_texts = []
response_texts = []
total_tokens = 0
for page_url in page_urls:
try:
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
response = requests.get(page_url, headers=headers)
html = response.text
page = Article('')
page.set_html(html)
page.parse()
except Exception as e:
return "", f"--- An error occurred while processing the URL: {e} ---", ""
sentences = page.text.split('.')
tokens = []
page_text = ""
for sentence in sentences:
tokens.extend(tokenizer.tokenize(sentence))
# Trim text to a maximum of 3100 tokens
if len(tokens) > 3100:
break
page_text += sentence + ". "
tokens.extend(tokenizer.tokenize(request))
tokens.extend(tokenizer.tokenize(system_role))
# Delete the last space
page_text = page_text.strip()
num_tokens = len(tokens)
tokens_condition = num_tokens > 10
api_key_condition = len(api_key) > 6
deployment_id_condition = len(deployment_id) > 6
if tokens_condition and api_key_condition and deployment_id_condition:
openai.api_type = "azure"
openai.api_version = "2023-05-15"
openai.api_base = api_base
openai.api_key = api_key
max_tokens = 4000 - num_tokens # TODO: change 4096 to a dictionary with the max tokens for each deploymend_id
# get the response from openai API
try:
response = openai.ChatCompletion.create(
deployment_id=deployment_id,
messages=[
{"role": "system", "content": system_role},
{"role": "user", "content": request + "\n\n" + 'Text:\n\n""""' + page_text + '\n""""'}
],
max_tokens=max_tokens,
temperature=temp,
top_p=1.0,
)
# get the response text
response_text = response['choices'][0]['message']['content']
total_tokens += response["usage"]["total_tokens"]
# clean the response text
response_text = re.sub(r'\s+', ' ', response_text)
response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}\n"
except Exception as e:
response_text = f"#### [{page.title}]({page_url})\n\n"
response_text += f"--- An error occurred while processing the request: {e} ---\n"
page_texts.append(page.text)
response_texts.append(response_text)
else:
page_text_temp = "ERROR:\n\n"
if page.text:
page_text_temp += page.text
response_text_temp = "#### "
if page.title:
response_text_temp += f"[{page.title}]({page_url})"
if not tokens_condition:
response_text_temp += "\n\nERROR: Tokens problems! Maybe it can't read the URL. "
if not api_key_condition:
response_text_temp += "\n\nERROR: API Key problems! Copy and paste the API Key (be careful with copying spaces at the beginning or end of the API Key). "
if not deployment_id_condition:
response_text_temp += "\n\nERROR: Deployment_id problems! Copy and paste the deployment_id (be careful with copying spaces at the beginning or end of the deployment_id). "
page_texts.append(page_text_temp)
response_texts.append(response_text_temp)
page_texts_str = "".join([f"====== NEW URL: {URL} ======\n{page_text}\n\n" for page_text, URL in zip(page_texts, page_urls)])
response_texts_str = "\n\n".join([response_text for response_text in response_texts])
total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens / 1000 * 0.03)
return page_texts_str, response_texts_str, total_tokens_str
# define the gradio interface
iface = gr.Interface(
fn=text_prompt,
inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System Role:", type="text"),
gr.Textbox(lines=10, placeholder="Enter the Articles' URLs here...", label="Articles' URLs to parse (one per line up to 10):", type="text"),
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
gr.Textbox(lines=1, placeholder="Enter your Azure OpenAI API base here...", label="Enter Azure API base (Endpoint):", type="text"),
gr.Textbox(lines=1, placeholder="Enter your model name here...", label="Deployment ID:", type="text"),
gr.Slider(0.0, 1.0, value=0.0, label="Temperature:")
],
outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
title="ChatGPT info extraction from URL",
description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using MSFT Azure OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
)
# error capturing in integration as a component
error_message = ""
try:
iface.queue(concurrency_count=20)
iface.launch(debug=True)
except Exception as e:
error_message = "An error occurred: " + str(e)
iface.outputs[1].value = error_message |