File size: 2,532 Bytes
b3118cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c70d408
b3118cd
 
 
 
 
 
 
922d2a0
b3118cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c70d408
b3118cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import re

from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM
)

def clean_text(text):
    text = text.encode("ascii", errors="ignore").decode(
        "ascii"
    )  # remove non-ascii, Chinese characters
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\n\n", " ", text)
    text = re.sub(r"\t", " ", text)
    text = re.sub(r"ADVERTISEMENT", " ", text)
    text = text.strip(" ")
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    return text


modchoice_1 = "chinhon/bart-large-cnn_summarizer_30216"

def summarizer1(text):
    input_text = clean_text(text)

    tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)

    model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)

    with tokenizer_1.as_target_tokenizer():
        batch = tokenizer_1(
            input_text, truncation=True, padding="longest", return_tensors="pt"
        )

    raw_1 = model_1.generate(**batch)

    summary_1 = tokenizer_1.batch_decode(raw_1, skip_special_tokens=True)

    summed_1 = summary_1[0]

    lines1 = summed_1.split(". ")

    for i in range(len(lines1)):
        lines1[i] = "* " + lines1[i]

    summ_bullet1 = "\n".join(lines1)

    return summ_bullet1


summary1 = gr.Interface(
    fn=summarizer1, inputs=gr.inputs.Textbox(), outputs=gr.outputs.Textbox(label="")
)


modchoice_2 = (
    "chinhon/pegasus-newsroom-summarizer_30216"
)

def summarizer2(text):
    input_text = clean_text(text)

    tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)

    model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)

    with tokenizer_2.as_target_tokenizer():
        batch = tokenizer_2(
            input_text, truncation=True, padding="longest", return_tensors="pt"
        )

    raw_2 = model_2.generate(**batch)

    summary_2 = tokenizer_2.batch_decode(raw_2, skip_special_tokens=True)

    summed_2 = summary_2[0]

    lines2 = summed_2.split(". ")

    for i in range(len(lines2)):
        lines2[i] = "* " + lines2[i]

    summ_bullet2 = "\n".join(lines2)

    return summ_bullet2

summary2 = gr.Interface(
    fn=summarizer2, inputs=gr.inputs.Textbox(), outputs=gr.outputs.Textbox(label="")
)


gradio_ui = gr.Parallel(
    summary1,
    summary2,
    title="Compare 2 AI Summarizers",
    inputs=gr.inputs.Textbox(
        lines=20,
        label="Paste your news story here, and choose from 2 suggested summaries",
    ),
    theme="huggingface",
)

gradio_ui.launch(enable_queue=True)