Spaces:

dromerosm
/

chatgpt-info-extraction

Runtime error

App Files Files Community

chatgpt-info-extraction / app.py

dromerosm

Update app.py

9c0af1d over 1 year ago

raw

history blame

5.1 kB

	import gradio as gr
	import os
	import openai
	from newspaper import Article
	import json
	import re
	from transformers import GPT2Tokenizer
	import requests


	# define the text summarizer function
	def text_prompt(request, system_role, page_url, contraseña, temp):
	try:
	headers = {'User-Agent': 'Chrome/83.0.4103.106'}
	response = requests.get(page_url, headers=headers)
	html = response.text

	page = Article('')
	page.set_html(html)
	page.parse()

	except Exception as e:
	return "", f"--- An error occurred while processing the URL: {e} ---", ""

	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
	sentences = page.text.split('.')

	tokens = []
	page_text = ""

	for sentence in sentences:
	tokens.extend(tokenizer.tokenize(sentence))

	# Trim text to a maximum of 3100 tokens
	if len(tokens) > 3100:
	break
	page_text += sentence + ". "

	# Delete the last space
	page_text = page_text.strip()

	num_tokens = len(tokens)

	if num_tokens > 10 and contraseña.startswith("sk-"):
	openai.api_key = contraseña
	# get the response from openai API
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": system_role},
	{"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
	],
	max_tokens=512,
	temperature=temp,
	top_p=1.0,
	)
	# get the response text
	response_text = response['choices'][0]['message']['content']
	total_tokens = response["usage"]["total_tokens"]

	# clean the response text
	response_text = re.sub(r'\s+', ' ', response_text)
	response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}"
	total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)


	return page.text, response_text, total_tokens_str
	except Exception as e:
	return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
	return page.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)

	# define the gradio interface
	iface = gr.Interface(
	fn=text_prompt,
	inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
	gr.Textbox(lines=1, placeholder="Enter your system-role description here...", label="System Role:", type="text"),
	gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
	gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
	gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
	],
	outputs=[gr.Textbox(label="Input:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
	examples=[["Resumen el siguiente texto en un máximo de 100 palabras.", "Actuar como consultor de negocio. La respuesta deberá aparentar ser novedosa. Formatea la respuesta en Markdown. El texto deberá ser traducido siempre al español. Deberás añadir al final una lista de topics del texto en forma de lista separada por comas.", "https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
	["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
	["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
	],
	title="ChatGPT info extraction from URL",
	description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
	)

	# error capturing in integration as a component

	error_message = ""

	try:
	iface.queue(concurrency_count=20)
	iface.launch()
	except Exception as e:
	error_message = "An error occurred: " + str(e)
	iface.outputs[1].value = error_message