File size: 2,028 Bytes
f4e5c3c
 
 
 
e0215db
f4e5c3c
e0215db
 
f4e5c3c
e0215db
a67e3e7
f4e5c3c
 
 
 
 
 
 
a67e3e7
a90e403
 
eccf8af
a90e403
 
 
 
 
f4e5c3c
 
664d61b
f4e5c3c
 
 
 
 
 
f622ee3
 
f4e5c3c
 
 
 
 
 
a67e3e7
f4e5c3c
 
a67e3e7
f4e5c3c
 
023b5fd
f4e5c3c
 
 
9a94dba
f4e5c3c
56f13c1
f4e5c3c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#importing the necessary library
import re
import nltk
import torch
import numpy as np
import gradio as gr

from nltk.tokenize import sent_tokenize
from transformers import pipeline
nltk.download("punkt")

    
#initailizing the model pipeline   
from transformers import BartTokenizer, BartForConditionalGeneration

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")


# Defining a function to read in the text file
def read_in_text(url):
  with open(url, "r") as file:
    article = file.read()
      
    return article

      
#Defining a function to get the summary of the article
def final_summary(file):
    
  #reading in the text and tokenizing it into sentence
  text = read_in_text(file.name)
  chunks = sent_tokenize(text)
  output = []

  #looping through the sentences in a batch of 10 and summarizing them
  for i in range(0,len(chunks), 10):
    sentence = ' '.join(chunks[i:i+10])
    inputs = tokenizer(sentence, max_length=1024, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]  
    output.append(summary)
  
  #joining all the summary output together
  summary = " ".join(output)
  lines1 = sent_tokenize(summary)
  for i in range(len(lines1)):
          lines1[i] = "* " + lines1[i].strip().replace(" .", ".")

  summ_bullet1 = "\n".join(lines1)
    
  return summ_bullet1  
  
  #creating an interface for the headline generator using gradio
demo = gr.Interface(final_summary, inputs=[gr.File(label="Drop your .txt file here")],
                                          title = "ARTICLE SUMMARIZER",
                                          outputs=[gr.Textbox(label="Summary")],
                                          theme= "darkhuggingface")
                                          
#launching the app
if __name__ == "__main__":
    demo.launch(debug=True)