Spaces:

valurank
/

Headline_generator

Runtime error

App Files Files Community

Headline_generator / app.py

abdulmatinomotoso

Update app.py

626a615 over 2 years ago

raw

history blame

2.25 kB

	#importing the necessary libraries

	import re
	import nltk
	from nltk.tokenize import sent_tokenize
	nltk.download('punkt')
	import gradio as gr
	from gradio.mix import Parallel

	# Defining a function to read in the text file
	def read_in_text(url):
	with open(url, 'r') as file:
	article = file.read()
	return article

	#Doing some text preprocessing, more will still be needed later
	def clean_text(url):
	text = read_in_text(url)
	text = text.encode("ascii", errors="ignore").decode(
	"ascii"
	) # remove non-ascii, Chinese characters

	text = re.sub('(by[\s\w,\|]+ - \d\d\/\d\d\/\d\d\s\d+:\d+\s\w{2}\s\w{2})\|(by[\s\w\|,]+\d\d,\s\d{4})', "", text)
	text = re.sub(r"\n", " ", text)
	text = re.sub(r"\n\n", " ", text)
	text = re.sub(r"\t", " ", text)
	text = text.strip(" ")
	text = re.sub(
	" +", " ", text
	).strip() # get rid of multiple spaces and replace with a single
	return text

	#importing the model and tokenizer for the headline generator
	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM,
	)

	#initializing the tokenizer and the model
	model_type_2 ="chinhon/pegasus-newsroom-headline_writer"
	tokenizer_2 = AutoTokenizer.from_pretrained(model_type_2)
	model_2 = AutoModelForSeq2SeqLM.from_pretrained(model_type_2)

	#Defining a function to generate the headlines
	def headline_generator_2(file):
	input_text = clean_text(file.name)

	with tokenizer_2.as_target_tokenizer():
	batch = tokenizer_2(
	input_text, truncation=True, padding="longest", return_tensors="pt"
	)

	translated = model_2.generate(**batch)
	summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True, max_length=100)
	return summary_2[0]

	#creating an interface for the headline generator using gradio
	demo = gr.Interface(headline_generator_2, inputs=[gr.inputs.File(label="Drop your .txt file here", optional=False)],
	title = "HEADLINE GENERATOR",
	outputs=[gr.outputs.Textbox(label="Headline")],
	theme= "darkhuggingface")

	#launching the app
	if __name__ == "__main__":
	demo.launch(debug=True)