Spaces:
Runtime error
Runtime error
#importing the necessary libraries | |
import re | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
nltk.download('punkt') | |
import gradio as gr | |
from gradio.mix import Parallel | |
# Defining a function to read in the text file | |
def read_in_text(url): | |
with open(url, 'r') as file: | |
article = file.read() | |
return article | |
#Doing some text preprocessing, more will still be needed later | |
def clean_text(url): | |
text = read_in_text(url) | |
text = text.encode("ascii", errors="ignore").decode( | |
"ascii" | |
) # remove non-ascii, Chinese characters | |
text = re.sub('(by[\s\w,|]+ - \d\d\/\d\d\/\d\d\s\d+:\d+\s\w{2}\s\w{2})|(by[\s\w|,]+\d\d,\s\d{4})', "", text) | |
text = re.sub(r"\n", " ", text) | |
text = re.sub(r"\n\n", " ", text) | |
text = re.sub(r"\t", " ", text) | |
text = text.strip(" ") | |
text = re.sub( | |
" +", " ", text | |
).strip() # get rid of multiple spaces and replace with a single | |
return text | |
#importing the model and tokenizer for the headline generator | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSeq2SeqLM, | |
) | |
#initializing the tokenizer and the model | |
model_type_2 ="chinhon/pegasus-newsroom-headline_writer" | |
tokenizer_2 = AutoTokenizer.from_pretrained(model_type_2) | |
model_2 = AutoModelForSeq2SeqLM.from_pretrained(model_type_2) | |
#Defining a function to generate the headlines | |
def headline_generator_2(file): | |
input_text = clean_text(file.name) | |
with tokenizer_2.as_target_tokenizer(): | |
batch = tokenizer_2( | |
input_text, truncation=True, padding="longest", return_tensors="pt" | |
) | |
translated = model_2.generate(**batch) | |
summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True, max_length=100) | |
return summary_2[0] | |
#creating an interface for the headline generator using gradio | |
demo = gr.Interface(headline_generator_2, inputs=[gr.inputs.File(label="Drop your .txt file here", optional=False)], | |
title = "HEADLINE GENERATOR", | |
outputs=[gr.outputs.Textbox(label="Headline")], | |
theme= "darkhuggingface") | |
#launching the app | |
if __name__ == "__main__": | |
demo.launch(debug=True) |