Spaces:
Paused
Paused
import gradio as gr | |
import torch | |
from tqdm import tqdm | |
import re | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
MODEL_NAME = "csebuetnlp/mT5_multilingual_XLSum" | |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
def summarize(text): | |
input_ids = tokenizer( | |
[WHITESPACE_HANDLER(text)], | |
return_tensors="pt", | |
padding="max_length", | |
truncation=True, | |
max_length=512 | |
)["input_ids"] | |
output_ids = model.generate( | |
input_ids=input_ids, | |
max_length=84, | |
no_repeat_ngram_size=2, | |
num_beams=4 | |
)[0] | |
summary = tokenizer.decode( | |
output_ids, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
) | |
return summary | |
demo = gr.Blocks(title="⭐ Summ4rizer ⭐") | |
demo.encrypt = False | |
with demo: | |
gr.Markdown(f''' | |
<div> | |
<h1 style='text-align: center'>Text Summarizer</h1> | |
</div> | |
<div> | |
Using summarization Model from <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a>. | |
</div> | |
''') | |
text = gr.Textbox(label="Text here !!", lines=1, interactive=True) | |
summarize_btn = gr.Button("Let's Summarize",) | |
summarization = gr.Textbox() | |
html_output = gr.Markdown() | |
summarize_btn.click(summarize, [text], outputs=[html_output, summarization]) | |
demo.launch() |