Spaces:
Sleeping
Sleeping
liamvbetts
commited on
Commit
•
caaf3b3
1
Parent(s):
7218a55
searchable
Browse files- app.py +26 -34
- requirements.txt +0 -1
app.py
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
import random
|
3 |
-
from datasets import load_dataset
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
import os
|
7 |
|
8 |
-
dataset = load_dataset("cnn_dailymail", "3.0.0")
|
9 |
-
|
10 |
NEWS_API_KEY = os.environ['NEWS_API_KEY']
|
11 |
HF_TOKEN = os.environ['HF_TOKEN']
|
12 |
|
@@ -17,12 +14,9 @@ def summarize(model_name, article):
|
|
17 |
payload = {"inputs": article}
|
18 |
response = requests.post(API_URL, headers=headers, json=payload)
|
19 |
|
20 |
-
# Check if the response is successful
|
21 |
if response.status_code == 200:
|
22 |
-
# Assuming the response structure has a 'generated_text' field
|
23 |
return format(response.json())
|
24 |
else:
|
25 |
-
# Handle different types of errors
|
26 |
if response.status_code == 401:
|
27 |
return "Error: Unauthorized. Check your API token."
|
28 |
elif response.status_code == 503:
|
@@ -34,23 +28,23 @@ def summarize(model_name, article):
|
|
34 |
def format(response):
|
35 |
return response[0]['generated_text']
|
36 |
|
37 |
-
def
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
def get_news_article():
|
47 |
-
url = 'https://newsapi.org/v2/top-headlines'
|
48 |
-
news_url = ''
|
49 |
-
params = {
|
50 |
-
'apiKey': NEWS_API_KEY,
|
51 |
-
'country': 'us', # You can change this as needed
|
52 |
-
'pageSize': 100
|
53 |
-
}
|
54 |
response = requests.get(url, params=params)
|
55 |
articles = response.json().get('articles', [])
|
56 |
if articles:
|
@@ -58,7 +52,7 @@ def get_news_article():
|
|
58 |
news_url = random_article.get('url')
|
59 |
else:
|
60 |
return None
|
61 |
-
|
62 |
if news_url:
|
63 |
full_article, title = scrape_article(news_url)
|
64 |
return full_article, title
|
@@ -70,36 +64,34 @@ def scrape_article(url):
|
|
70 |
response = requests.get(url)
|
71 |
soup = BeautifulSoup(response.content, 'html.parser')
|
72 |
|
73 |
-
# Extracting the title - this is a general approach
|
74 |
title = soup.title.string if soup.title else "No Title Available"
|
75 |
|
76 |
-
article_content = soup.find_all('p')
|
77 |
|
78 |
text = ' '.join([p.get_text() for p in article_content])
|
79 |
words = text.split()
|
80 |
-
truncated_text = ' '.join(words[:512])
|
81 |
-
|
82 |
return truncated_text, title
|
83 |
except Exception as e:
|
84 |
return "Error scraping article: " + str(e), ""
|
85 |
|
86 |
with gr.Blocks() as demo:
|
87 |
gr.Markdown("# News Summary App")
|
88 |
-
gr.Markdown("Enter a news text
|
89 |
|
90 |
with gr.Row():
|
91 |
with gr.Column():
|
92 |
-
|
93 |
-
load_news_article_button = gr.Button("
|
94 |
-
article_title = gr.Label()
|
95 |
-
input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text
|
96 |
with gr.Column():
|
97 |
model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
|
98 |
summarize_button = gr.Button("Summarize")
|
99 |
output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
|
100 |
|
101 |
-
|
102 |
-
load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
|
103 |
summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
|
104 |
|
105 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import random
|
|
|
3 |
import requests
|
4 |
from bs4 import BeautifulSoup
|
5 |
import os
|
6 |
|
|
|
|
|
7 |
NEWS_API_KEY = os.environ['NEWS_API_KEY']
|
8 |
HF_TOKEN = os.environ['HF_TOKEN']
|
9 |
|
|
|
14 |
payload = {"inputs": article}
|
15 |
response = requests.post(API_URL, headers=headers, json=payload)
|
16 |
|
|
|
17 |
if response.status_code == 200:
|
|
|
18 |
return format(response.json())
|
19 |
else:
|
|
|
20 |
if response.status_code == 401:
|
21 |
return "Error: Unauthorized. Check your API token."
|
22 |
elif response.status_code == 503:
|
|
|
28 |
def format(response):
|
29 |
return response[0]['generated_text']
|
30 |
|
31 |
+
def get_news_article(search_query):
|
32 |
+
if search_query.strip():
|
33 |
+
url = 'https://newsapi.org/v2/everything'
|
34 |
+
params = {
|
35 |
+
'apiKey': NEWS_API_KEY,
|
36 |
+
'q': search_query,
|
37 |
+
'pageSize': 100,
|
38 |
+
'language': 'en'
|
39 |
+
}
|
40 |
+
else:
|
41 |
+
url = 'https://newsapi.org/v2/top-headlines'
|
42 |
+
params = {
|
43 |
+
'apiKey': NEWS_API_KEY,
|
44 |
+
'country': 'us',
|
45 |
+
'pageSize': 100
|
46 |
+
}
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
response = requests.get(url, params=params)
|
49 |
articles = response.json().get('articles', [])
|
50 |
if articles:
|
|
|
52 |
news_url = random_article.get('url')
|
53 |
else:
|
54 |
return None
|
55 |
+
|
56 |
if news_url:
|
57 |
full_article, title = scrape_article(news_url)
|
58 |
return full_article, title
|
|
|
64 |
response = requests.get(url)
|
65 |
soup = BeautifulSoup(response.content, 'html.parser')
|
66 |
|
|
|
67 |
title = soup.title.string if soup.title else "No Title Available"
|
68 |
|
69 |
+
article_content = soup.find_all('p')
|
70 |
|
71 |
text = ' '.join([p.get_text() for p in article_content])
|
72 |
words = text.split()
|
73 |
+
truncated_text = ' '.join(words[:512])
|
74 |
+
|
75 |
return truncated_text, title
|
76 |
except Exception as e:
|
77 |
return "Error scraping article: " + str(e), ""
|
78 |
|
79 |
with gr.Blocks() as demo:
|
80 |
gr.Markdown("# News Summary App")
|
81 |
+
gr.Markdown("Enter a news text, search for news articles, or load a random article.")
|
82 |
|
83 |
with gr.Row():
|
84 |
with gr.Column():
|
85 |
+
search_query_input = gr.Textbox(label="Search for News", placeholder="Enter a topic to search...")
|
86 |
+
load_news_article_button = gr.Button("Search News Article")
|
87 |
+
article_title = gr.Label()
|
88 |
+
input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text, load a random article, or search for news...")
|
89 |
with gr.Column():
|
90 |
model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
|
91 |
summarize_button = gr.Button("Summarize")
|
92 |
output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
|
93 |
|
94 |
+
load_news_article_button.click(fn=get_news_article, inputs=[search_query_input], outputs=[input_text, article_title])
|
|
|
95 |
summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
|
96 |
|
97 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
gradio
|
2 |
-
datasets
|
3 |
beautifulsoup4
|
|
|
1 |
gradio
|
|
|
2 |
beautifulsoup4
|