Spaces:

chinhon
/

newsroom_hdwriter

Sleeping

App Files Files Community

chinhon commited on Oct 25, 2022

Commit

4f40928

•

1 Parent(s): 9151ecd

Oct 25 eng n malay hds

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+import re
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+)
+def clean_text(text):
+    text = text.encode("ascii", errors="ignore").decode(
+        "ascii"
+    )  # remove non-ascii, Chinese characters
+    text = re.sub(r"\n", " ", text)
+    text = re.sub(r"\n\n", " ", text)
+    text = re.sub(r"\t", " ", text)
+    text = re.sub(r"ADVERTISEMENT", " ", text)
+    text = re.sub(r"ADVERTISING", " ", text)
+    text = text.strip(" ")
+    text = re.sub(
+        " +", " ", text
+    ).strip()  # get rid of multiple spaces and replace with a single
+    return text
+def newsroom_hd(hdchoice, text):
+    if hdchoice == "Singapore News":
+        modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22"
+    elif hdchoice == "International News":
+        modchoice = "chinhon/pegasus-newsroom_wires_hdwriter42k"
+    elif hdchoice == "Commentary":
+        modchoice = "chinhon/bart-large-commentaries_hdwriter"
+    elif hdchoice == "News in Malay":
+        modchoice = "chinhon/pegasus-newsroom-malay_headlines"
+    else:
+        modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22"
+    input_text = clean_text(text)
+    tokenizer = AutoTokenizer.from_pretrained(modchoice)
+    model = AutoModelForSeq2SeqLM.from_pretrained(modchoice)
+    with tokenizer.as_target_tokenizer():
+        batch = tokenizer(
+            input_text, truncation=True, padding="longest", return_tensors="pt"
+        )
+    raw = model.generate(**batch)
+    headline = tokenizer.batch_decode(raw, skip_special_tokens=True)
+    return headline[0]
+gradio_ui = gr.Interface(
+    fn=newsroom_hd,
+    title="Generate Newsroom Headlines With AI",
+    description="**How to use**: Select the type of headline you wish to generate, paste in a relevant amount of text, and click submit.",
+    article="**Note**: Paste in as much text as you think necessary, though there's an automatic cut-off of about 500 words for some models and about 850 words for others. If you copy-and-paste directly from a website, take note to remove unrelated text such as those for advertisements and recommended links.",
+    inputs=[
+        gr.Dropdown(
+            label="Select the type of headlines you would like to generate",
+            choices=[
+                "Singapore News",
+                "International News",
+                "Commentary",
+                "News in Malay",
+            ],
+            value="Singapore News",
+        ),
+        gr.Textbox(label="Paste text here"),
+    ],
+    outputs=gr.Textbox(label="Suggested Headline"),
+)
+gradio_ui.queue().launch()