8testiaa3

Sleeping

8testiaa3 / app.py

Duplicate from chinhon/headline_writer

7f95732 almost 2 years ago

1.65 kB

	import gradio as gr
	import re

	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM,
	)

	def clean_text(text):
	text = text.encode("ascii", errors="ignore").decode(
	"ascii"
	) # remove non-ascii, Chinese characters
	text = re.sub(r"http\S+", "", text)
	text = re.sub(r"\n", " ", text)
	text = re.sub(r"\n\n", " ", text)
	text = re.sub(r"\t", " ", text)
	text = re.sub(r"ADVERTISEMENT", " ", text)
	text = text.strip(" ")
	text = re.sub(
	" +", " ", text
	).strip() # get rid of multiple spaces and replace with a single
	return text


	model_name = "chinhon/pegasus-newsroom-headline_writer_57k"

	def headline_writer(text):
	input_text = clean_text(text)

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	with tokenizer.as_target_tokenizer():
	batch = tokenizer(
	input_text,
	truncation=True,
	padding="longest",
	return_tensors="pt",
	)

	raw_write = model.generate(**batch)

	headline = tokenizer.batch_decode(
	raw_write, skip_special_tokens=True, min_length=200, length_penalty=50.5
	)

	return headline[0]


	gradio_ui = gr.Interface(
	fn=headline_writer,
	title="Generate News Headlines with AI",
	description="Too busy or tired to write a headline? Try this instead.",
	inputs=gr.inputs.Textbox(
	lines=20, label="Paste the first few paras of your news story here"
	),
	outputs=gr.outputs.Textbox(label="Suggested Headline"),
	theme="darkdefault"
	)

	gradio_ui.launch(enable_queue=True)