chinhon commited on
Commit
6b309c0
1 Parent(s): 64ae0cc

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -63
app.py DELETED
@@ -1,63 +0,0 @@
1
- import gradio as gr
2
- import re
3
-
4
- from transformers import (
5
- AutoTokenizer,
6
- AutoModelForSeq2SeqLM,
7
- )
8
-
9
- def clean_text(text):
10
- text = text.encode("ascii", errors="ignore").decode(
11
- "ascii"
12
- ) # remove non-ascii, Chinese characters
13
- text = re.sub(r"http\S+", "", text)
14
- text = re.sub(r"\n", " ", text)
15
- text = re.sub(r"\n\n", " ", text)
16
- text = re.sub(r"\t", " ", text)
17
- text = text.strip(" ")
18
- text = re.sub(
19
- " +", " ", text
20
- ).strip() # get rid of multiple spaces and replace with a single
21
- return text
22
-
23
-
24
- model_name = "chinhon/pegasus-newsroom-malay_headlines"
25
-
26
- def headline_writer(text):
27
- input_text = clean_text(text)
28
-
29
- tokenizer = AutoTokenizer.from_pretrained(model_name)
30
-
31
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
32
-
33
- with tokenizer.as_target_tokenizer():
34
- batch = tokenizer(
35
- input_text,
36
- truncation=True,
37
- max_length=1024,
38
- padding="longest",
39
- return_tensors="pt",
40
- )
41
-
42
- raw_write = model.generate(**batch)
43
-
44
- headline = tokenizer.batch_decode(
45
- raw_write, skip_special_tokens=True, min_length=100, length_penalty=100.1
46
- )
47
-
48
- return headline[0]
49
-
50
-
51
- gradio_ui = gr.Interface(
52
- fn=headline_writer,
53
- title="Malay News Headlines Generator",
54
- description="Too busy or tired to write a headline? Try this instead.",
55
- inputs=gr.inputs.Textbox(
56
- lines=20, label="Paste the first few paras of a Malay language news story here"
57
- ),
58
- outputs=gr.outputs.Textbox(label="Suggested Headline"),
59
- theme="darkdefault"
60
- )
61
-
62
-
63
- gradio_ui.launch()