Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import openai | |
from newspaper import Article | |
import json | |
import re | |
from transformers import GPT2Tokenizer | |
import requests | |
import time | |
def text_prompt(page_url, azure_endpoint, contraseña, temp): | |
# Reemplazar estas líneas con cadenas de texto fijas | |
request = """Analizar el siguiente texto de una noticia en prensa y generar un informe tipo KYC (Know Your Customer) para análisis de riesgos, considerando los siguientes aspectos: | |
1. Identificación de las partes involucradas (personas, empresas, entidades) | |
2. Actividades sospechosas o inusuales descritas en el texto | |
Una vez generada la respuesta, aplicale formato HTML""" | |
system_role = """Actua como analista de riesgos especializado en cumplimiento normativo y KYC (Know Your Customer). | |
Tendrás una sólida formación en finanzas, derecho o gestión, y estarás familiarizado con la normativa local e internacional relacionada con la prevención del blanqueo de capitales y la financiación del terrorismo. | |
Poseerás avanzadas capacidades analíticas y de investigación, lo que te permitirá evaluar eficazmente la información facilitada en las noticias y determinar el nivel de riesgo asociado a las partes implicadas. | |
También tendrás excelentes dotes de comunicación escrita y verbal para presentar de forma clara y concisa las conclusiones en un informe accesible a los ejecutivos y otras partes interesadas de la organización. | |
Además, estarás al día de las tendencias y novedades en el ámbito del cumplimiento de la normativa y la gestión de riesgos. | |
El formato de la respuesta será siempre HMTL.""" | |
start_time = time.time() | |
try: | |
headers = {'User-Agent': 'Chrome/83.0.4103.106'} | |
response = requests.get(page_url, headers=headers) | |
html = response.text | |
page = Article('') | |
page.set_html(html) | |
page.parse() | |
except Exception as e: | |
return "", f"--- An error occurred while processing the URL: {e} ---", "" | |
url_processing_time = time.time() - start_time | |
print(f"URL processing time: {url_processing_time:.4f} seconds") | |
start_time = time.time() | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
sentences = page.text.split('.') | |
tokens = [] | |
page_text = "" | |
for sentence in sentences: | |
tokens.extend(tokenizer.tokenize(sentence)) | |
# Trim text to a maximum of 3000 tokens | |
if len(tokens) > 3000: | |
break | |
page_text += sentence + ". " | |
# Delete the last space | |
page_text = page_text.strip() | |
num_tokens = len(tokens) | |
tokenization_time = time.time() - start_time | |
print(f"Tokenization time: {tokenization_time:.4f} seconds") | |
if num_tokens > 10: | |
# define azure openai context | |
openai.api_type = "azure" | |
openai.api_base = azure_endpoint | |
openai.api_version = "2023-03-15-preview" | |
openai.api_key = contraseña | |
# get the response from Azure OpenAI API | |
start_time = time.time() | |
try: | |
response = openai.ChatCompletion.create( | |
engine="gpt-35-turbo-version-0301", | |
messages=[ | |
{"role": "system", "content": system_role}, | |
{"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text} | |
], | |
max_tokens=1024, | |
temperature=temp, | |
top_p=1.0, | |
) | |
# get the response text | |
response_text = response['choices'][0]['message']['content'] | |
total_tokens = response["usage"]["total_tokens"] | |
# clean the response text | |
response_text = re.sub(r'\s+', ' ', response_text) | |
response_text = f"#### [{page.title}]({page_url})\n\n{response_text.strip()}" | |
total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002) | |
api_processing_time = time.time() - start_time | |
print(f"API processing time: {api_processing_time:.4f} seconds") | |
return page.text, response_text, total_tokens_str | |
except Exception as e: | |
return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens | |
return page.text, "--- Check API-Key or Min number of tokens:", str(num_tokens) | |
# define the gradio interface | |
iface = gr.Interface( | |
fn=text_prompt, | |
inputs=[ | |
gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to analyse:", type="text"), | |
gr.Textbox(lines=1, placeholder="Enter the MSFT Azure OpenAI endpoint here...", label="Azure endpoint:", type="text"), | |
gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"), | |
gr.Slider(0.0,1.0, value=0.3, label="Temperature (0-1):") | |
], | |
outputs=[gr.Textbox(label="Text from URL:"), | |
gr.Markdown(label="Output from GPT:"), | |
gr.Markdown(label="Total Tokens:") | |
], | |
title="ChatGPT - KYC from URL", | |
description="""This tool allows to generate points of a KYC report based on the text retrieved from the URL using the [gpt-3.5-turbo] engine of MSFT Azure OpenAI Service. | |
Provide the url for text retrieval, your endopoint, api-key and the temperature to process the text.""" | |
) | |
# error capturing in integration as a component | |
error_message = "" | |
try: | |
iface.queue(concurrency_count=5) | |
iface.launch() | |
except Exception as e: | |
error_message = "An error occurred: " + str(e) | |
iface.outputs[1].value = error_message | |