Spaces:
Runtime error
Runtime error
#importing the necessary libraries | |
import re | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
nltk.download("punkt") | |
import gradio as gr | |
# Defining a function to read in the text file | |
def read_in_text(url): | |
with open(URL, "r") as file: | |
article = file.read() | |
return article | |
#Doing some text preprocessing, more will still be needed later | |
def clean_text(text): | |
#text = read_in_text(url) | |
text = text.encode("ascii", errors="ignore").decode( | |
"ascii" | |
) # remove non-ascii, Chinese characters | |
text = re.sub(r"\n", " ", text) | |
text = re.sub(r"\n\n", " ", text) | |
text = re.sub(r"\t", " ", text) | |
text = text.strip(" ") | |
text = re.sub( | |
" +", " ", text | |
).strip() # get rid of multiple spaces and replace with a single | |
return text | |
#importing the model and tokenizer for the headline generator | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSeq2SeqLM, | |
) | |
#initializing the tokenizer and the model | |
tokenizer = AutoTokenizer.from_pretrained("valurank/final_headline_generator") | |
model = AutoModelForSeq2SeqLM.from_pretrained("valurank/final_headline_generator") | |
#Defining a function to generate the headlines | |
def headline_generator_2(file): | |
input_text = file | |
#input_text = sent_tokenize(input_text) | |
#text = ''.join(input_text[:6]) | |
inputs = tokenizer(input_text,truncation=True, return_tensors="pt") | |
summary_ids = model.generate(inputs["input_ids"],min_length=20, max_length=40) | |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
return summary | |
#creating an interface for the headline generator using gradio | |
demo = gr.Interface(headline_generator_2, inputs=[gr.Textbox(label="Drop your .txt file here")], | |
title = "HEADLINE GENERATOR", | |
outputs=[gr.Textbox(label="Headline")], | |
theme= "darkhuggingface") | |
#launching the app | |
if __name__ == "__main__": | |
demo.launch(debug=True) |