File size: 4,963 Bytes
4e003b1
 
 
5a5189b
38c31b9
 
4e003b1
 
 
 
 
 
30c813e
4e003b1
d8f1bff
4e003b1
d8f1bff
 
 
 
 
 
 
4e003b1
 
 
 
 
d8f1bff
 
4e003b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30c813e
 
4e003b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c791143
4e003b1
 
 
c791143
4e003b1
c791143
 
4e003b1
 
 
 
 
30c813e
4e003b1
 
30c813e
4e003b1
9b8786e
d8f1bff
4e003b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import os
import openai
from newspaper import Article
from newspaper import Config

import json
import re
from transformers import GPT2Tokenizer
import requests

# define the text summarizer function
def text_prompt(request, system_role, page_url, api_key, temp):
    try:
        USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

        config = Config()
        config.browser_user_agent = USER_AGENT
        config.request_timeout = 10

        article = Article(page_url, config=config)
        article.download()
        article.parse()

    except Exception as e:
        return "", f"--- An error occurred while processing the URL: {e} ---", ""
    
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    #TODO: for chinese, separator is '。'
    sentences = article.text.split('.')
    
    tokens = []
    page_text = ""
    
    for sentence in sentences:
        tokens.extend(tokenizer.tokenize(sentence))
        
        # Trim text to a maximum of 3100 tokens
        if len(tokens) > 3100:
            break
        page_text += sentence + ". "
        
    # Delete the last space
    page_text = page_text.strip()

    num_tokens = len(tokens)

    if num_tokens > 10 and api_key.startswith("sk-"):
        openai.api_key = api_key
        # get the response from openai API
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": system_role},
                    {"role": "user", "content": request + "\n\n" + 'Text:\n\n"' + page_text + '\n"'}
                ],
                max_tokens=512,
                temperature=temp,
                top_p=1.0,
            )
            # get the response text
            response_text = response['choices'][0]['message']['content']
            total_tokens = response["usage"]["total_tokens"]

            # clean the response text
            response_text = re.sub(r'\s+', ' ', response_text)
            response_text = f"#### [{page.title}]({article_url})\n\n{response_text.strip()}"
            total_tokens_str = str(total_tokens) + " (${:.2f} USD)".format(total_tokens/1000*0.002)


            return article.text, response_text, total_tokens_str
        except Exception as e:
            return article.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
    return article.text, "--- Check API-Key or Min number of tokens:", str(num_tokens)

# define the gradio interface
iface = gr.Interface(
    fn=text_prompt,
    inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
            gr.Textbox(lines=1, placeholder="Enter your gpt-role description here...", label="GPT Role:", type="text"),
            gr.Textbox(lines=1, placeholder="Enter the Article's URL here...", label="Article's URL to parse:", type="text"),
            gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
            gr.Slider(0.0,1.0, value=0.7, label="Temperature:")
            ],
    outputs=[gr.Textbox(label="Output:"), gr.Markdown(label="Output:"), gr.Markdown(label="Total Tokens:")],
    examples=[["请用简体中文生成一段200字的摘要,并提取5个关键词.", "作为新闻编辑", "https://openai.com/blog/planning-for-agi-and-beyond","",0.7],
            ["Generate a summary of the following text. Give me an overview of the main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "Act as a Business Consultant", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
            ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","Act as a Business Consultant", "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
    ],
    title="ChatGPT info extraction from URL",
    description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [gpt-3.5-turbo] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the description for the system role, the url for text retrieval, your api-key and temperature to process the text."
)

# error capturing in integration as a component

error_message = ""

try:
    iface.queue(concurrency_count=20)
    iface.launch()
except Exception as e:
    error_message = "An error occurred: " + str(e)
    iface.outputs[1].value = error_message