|
|
|
import re |
|
import nltk |
|
import torch |
|
from nltk.tokenize import sent_tokenize |
|
nltk.download('punkt') |
|
import gradio as gr |
|
from gradio.mix import Parallel |
|
from transformers import pipeline |
|
import numpy as np |
|
|
|
|
|
|
|
def read_in_text(url): |
|
with open(URL, "r") as file: |
|
article = file.read() |
|
|
|
return article |
|
|
|
|
|
from transformers import BartTokenizer, BartForConditionalGeneration |
|
|
|
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") |
|
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") |
|
|
|
|
|
|
|
def final_summary(file): |
|
|
|
|
|
text = read_in_text(file.name) |
|
chunks = sent_tokenize(text) |
|
output = [] |
|
|
|
|
|
for i in range(0,len(chunks), 10): |
|
sentence = ' '.join(chunks[i:i+10]) |
|
inputs = tokenizer(sentence, max_length=1024, return_tensors="pt") |
|
summary_ids = model.generate(inputs["input_ids"]) |
|
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] |
|
output.append(summary) |
|
|
|
|
|
summary = " ".join(output) |
|
lines1 = sent_tokenize(summary) |
|
for i in range(len(lines1)): |
|
lines1[i] = "* " + lines1[i].strip().replace(" .", ".") |
|
|
|
summ_bullet1 = "\n".join(lines1) |
|
|
|
return summ_bullet1 |
|
|
|
|
|
demo = gr.Interface(final_summary, inputs=[gr.inputs.File(label="Drop your .txt file here", optional=False)], |
|
title = "ARTICLE SUMMARIZER", |
|
outputs=[gr.outputs.Textbox(label="Summary")], |
|
theme= "darkhuggingface") |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |