bisoye commited on
Commit
132646e
1 Parent(s): fe4489c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +43 -0
  2. helper_fns.py +30 -0
  3. summarizer.py +33 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from helper_fns import process_files, get_summarization_method
3
+ from summarizer import summarize_files
4
+
5
+ with gr.Blocks() as demo:
6
+ with gr.Row():
7
+ with gr.Column():
8
+ files = gr.UploadButton(
9
+ label='Upload Files For Summarization',
10
+ file_count='multiple',
11
+ file_types=["pdf", "docx", "pptx"]
12
+ )
13
+ summarization_method_radio = gr.Radio(choices=['map_reduce', 'stuff', 'refine'],
14
+ value='map_reduce',
15
+ label='Select Summarization Method',
16
+ interactive=False)
17
+
18
+ generate_summaries_button = gr.Button(value='Generate Summaries',
19
+ interactive=False,
20
+ elem_id='summary_button')
21
+
22
+
23
+
24
+ files.upload(process_files, None, outputs=[generate_summaries_button,
25
+ summarization_method_radio])
26
+ summarization_method_radio.input(fn = get_summarization_method,
27
+ inputs=summarization_method_radio)
28
+
29
+ with gr.Column():
30
+ summary_text = gr.Textbox(label='Summarized Text: ',
31
+ interactive=False)
32
+
33
+
34
+ generate_summaries_button.click(
35
+ fn = summarize_files,
36
+ inputs=[summarization_method_radio, files],
37
+ outputs=[summary_text]#, audio_file]
38
+ )
39
+
40
+
41
+
42
+
43
+
helper_fns.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ def process_files():
5
+ return (gr.update(interactive=True,
6
+ elem_id='summary_button'),
7
+ gr.update(interactive = True, elem_id = 'summarization_method')
8
+ )
9
+
10
+
11
+
12
+ def get_summarization_method(option):
13
+ return option
14
+
15
+
16
+
17
+
18
+ def text_to_audio(text, model_name="facebook/fastspeech2-en-ljspeech"):
19
+ # Initialize the TTS pipeline
20
+ tts_pipeline = pipeline("text-to-speech", model=model_name)
21
+
22
+ # Generate the audio from text
23
+ audio = tts_pipeline(text)
24
+
25
+ # Save the audio to a file
26
+ audio_path = "output.wav"
27
+ with open(audio_path, "wb") as file:
28
+ file.write(audio["wav"])
29
+
30
+ return audio_path
summarizer.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader
2
+ from langchain_community.document_loaders import Docx2txtLoader
3
+ from langchain_community.document_loaders import UnstructuredPowerPointLoader
4
+ from langchain_cohere.llms import Cohere
5
+ from langchain.chains.summarize import load_summarize_chain
6
+ from pathlib import Path
7
+ import os
8
+
9
+ def summarize_files(method, files):
10
+ # Initialize the LLM
11
+ llm = Cohere(temperature=0)
12
+ summaries = []
13
+ # Load and read each file
14
+ for file in os.listdir(files):
15
+
16
+ file_path = os.path.join(files, file)
17
+ ext = Path(file_path).suffix.lower()
18
+ if ext == '.pdf':
19
+ loader = PyPDFLoader(file_path)
20
+ elif ext == '.docx':
21
+ loader = Docx2txtLoader(file_path)
22
+ elif ext == '.pptx':
23
+ loader = UnstructuredPowerPointLoader(file_path)
24
+ else:
25
+ raise ValueError(f"Unsupported file extension: {ext}")
26
+
27
+ docs = loader.load_and_split()
28
+ # Initialize a summarization chain with the specified method
29
+ summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
30
+ summary = summarization_chain.run(docs)
31
+ summaries.append(summary)
32
+
33
+ return summaries