#importing the necessary libraries import re import nltk from nltk.tokenize import sent_tokenize nltk.download("punkt") import gradio as gr # Defining a function to read in the text file def read_in_text(url): with open(URL, "r") as file: article = file.read() return article #Doing some text preprocessing, more will still be needed later def clean_text(text): #text = read_in_text(url) text = text.encode("ascii", errors="ignore").decode( "ascii" ) # remove non-ascii, Chinese characters text = re.sub(r"\n", " ", text) text = re.sub(r"\n\n", " ", text) text = re.sub(r"\t", " ", text) text = text.strip(" ") text = re.sub( " +", " ", text ).strip() # get rid of multiple spaces and replace with a single return text #importing the model and tokenizer for the headline generator from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, ) #initializing the tokenizer and the model tokenizer = AutoTokenizer.from_pretrained("valurank/final_headline_generator") model = AutoModelForSeq2SeqLM.from_pretrained("valurank/final_headline_generator") #Defining a function to generate the headlines def headline_generator_2(file): input_text = file #input_text = sent_tokenize(input_text) #text = ''.join(input_text[:6]) inputs = tokenizer(input_text,truncation=True, return_tensors="pt") summary_ids = model.generate(inputs["input_ids"],min_length=20, max_length=40) summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] return summary #creating an interface for the headline generator using gradio demo = gr.Interface(headline_generator_2, inputs=[gr.Textbox(label="Drop your .txt file here", optional=False)], title = "HEADLINE GENERATOR", outputs=[gr.Textbox(label="Headline")], theme= "darkhuggingface") #launching the app if __name__ == "__main__": demo.launch(debug=True)