chinhon commited on
Commit
4f40928
1 Parent(s): 9151ecd

Oct 25 eng n malay hds

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForSeq2SeqLM,
6
+ )
7
+
8
+ def clean_text(text):
9
+ text = text.encode("ascii", errors="ignore").decode(
10
+ "ascii"
11
+ ) # remove non-ascii, Chinese characters
12
+ text = re.sub(r"\n", " ", text)
13
+ text = re.sub(r"\n\n", " ", text)
14
+ text = re.sub(r"\t", " ", text)
15
+ text = re.sub(r"ADVERTISEMENT", " ", text)
16
+ text = re.sub(r"ADVERTISING", " ", text)
17
+ text = text.strip(" ")
18
+ text = re.sub(
19
+ " +", " ", text
20
+ ).strip() # get rid of multiple spaces and replace with a single
21
+ return text
22
+
23
+ def newsroom_hd(hdchoice, text):
24
+ if hdchoice == "Singapore News":
25
+ modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22"
26
+
27
+ elif hdchoice == "International News":
28
+ modchoice = "chinhon/pegasus-newsroom_wires_hdwriter42k"
29
+
30
+ elif hdchoice == "Commentary":
31
+ modchoice = "chinhon/bart-large-commentaries_hdwriter"
32
+
33
+ elif hdchoice == "News in Malay":
34
+ modchoice = "chinhon/pegasus-newsroom-malay_headlines"
35
+
36
+ else:
37
+ modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22"
38
+
39
+ input_text = clean_text(text)
40
+
41
+ tokenizer = AutoTokenizer.from_pretrained(modchoice)
42
+
43
+ model = AutoModelForSeq2SeqLM.from_pretrained(modchoice)
44
+
45
+ with tokenizer.as_target_tokenizer():
46
+ batch = tokenizer(
47
+ input_text, truncation=True, padding="longest", return_tensors="pt"
48
+ )
49
+
50
+ raw = model.generate(**batch)
51
+
52
+ headline = tokenizer.batch_decode(raw, skip_special_tokens=True)
53
+
54
+ return headline[0]
55
+
56
+ gradio_ui = gr.Interface(
57
+ fn=newsroom_hd,
58
+ title="Generate Newsroom Headlines With AI",
59
+ description="**How to use**: Select the type of headline you wish to generate, paste in a relevant amount of text, and click submit.",
60
+ article="**Note**: Paste in as much text as you think necessary, though there's an automatic cut-off of about 500 words for some models and about 850 words for others. If you copy-and-paste directly from a website, take note to remove unrelated text such as those for advertisements and recommended links.",
61
+ inputs=[
62
+ gr.Dropdown(
63
+ label="Select the type of headlines you would like to generate",
64
+ choices=[
65
+ "Singapore News",
66
+ "International News",
67
+ "Commentary",
68
+ "News in Malay",
69
+ ],
70
+ value="Singapore News",
71
+ ),
72
+ gr.Textbox(label="Paste text here"),
73
+ ],
74
+ outputs=gr.Textbox(label="Suggested Headline"),
75
+ )
76
+
77
+
78
+ gradio_ui.queue().launch()