Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSeq2SeqLM, | |
) | |
def clean_text(text): | |
text = text.encode("ascii", errors="ignore").decode( | |
"ascii" | |
) # remove non-ascii, Chinese characters | |
text = re.sub(r"\n", " ", text) | |
text = re.sub(r"\n\n", " ", text) | |
text = re.sub(r"\t", " ", text) | |
text = re.sub(r"ADVERTISEMENT", " ", text) | |
text = re.sub(r"ADVERTISING", " ", text) | |
text = text.strip(" ") | |
text = re.sub( | |
" +", " ", text | |
).strip() # get rid of multiple spaces and replace with a single | |
return text | |
def newsroom_hd(hdchoice, text): | |
if hdchoice == "Singapore News": | |
modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" | |
elif hdchoice == "International News": | |
modchoice = "chinhon/pegasus-newsroom_wires_hdwriter42k" | |
elif hdchoice == "Commentary": | |
modchoice = "chinhon/bart-large-commentaries_hdwriter" | |
elif hdchoice == "News in Malay": | |
modchoice = "chinhon/pegasus-newsroom-malay_headlines" | |
else: | |
modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" | |
input_text = clean_text(text) | |
tokenizer = AutoTokenizer.from_pretrained(modchoice) | |
model = AutoModelForSeq2SeqLM.from_pretrained(modchoice) | |
with tokenizer.as_target_tokenizer(): | |
batch = tokenizer( | |
input_text, truncation=True, padding="longest", return_tensors="pt" | |
) | |
raw = model.generate(**batch) | |
headline = tokenizer.batch_decode(raw, skip_special_tokens=True) | |
return headline[0] | |
gradio_ui = gr.Interface( | |
fn=newsroom_hd, | |
title="Generate Newsroom Headlines With AI", | |
description="**How to use**: Select the type of headline you wish to generate, paste in a relevant amount of text, and click submit.", | |
article="**Note**: Paste in as much text as you think necessary, though there's an automatic cut-off of about 500 words for some models and about 850 words for others. If you copy-and-paste directly from a website, take note to remove unrelated text such as those for advertisements and recommended links.", | |
inputs=[ | |
gr.Dropdown( | |
label="Select the type of headlines you would like to generate", | |
choices=[ | |
"Singapore News", | |
"International News", | |
"Commentary", | |
"News in Malay", | |
], | |
value="Singapore News", | |
), | |
gr.Textbox(label="Paste text here"), | |
], | |
outputs=gr.Textbox(label="Suggested Headline"), | |
) | |
gradio_ui.queue().launch() | |