bisoye commited on
Commit
47c8018
1 Parent(s): 8dd1466

Update summarizer.py

Browse files
Files changed (1) hide show
  1. summarizer.py +32 -33
summarizer.py CHANGED
@@ -1,33 +1,32 @@
1
- from langchain.document_loaders import PyPDFLoader
2
- from langchain_community.document_loaders import Docx2txtLoader
3
- from langchain_community.document_loaders import UnstructuredPowerPointLoader
4
- from langchain_cohere.llms import Cohere
5
- from langchain.chains.summarize import load_summarize_chain
6
- from pathlib import Path
7
- import os
8
-
9
- def summarize_files(method, files):
10
- # Initialize the LLM
11
- llm = Cohere(temperature=0)
12
- summaries = []
13
- # Load and read each file
14
- for file in os.listdir(files):
15
-
16
- file_path = os.path.join(files, file)
17
- ext = Path(file_path).suffix.lower()
18
- if ext == '.pdf':
19
- loader = PyPDFLoader(file_path)
20
- elif ext == '.docx':
21
- loader = Docx2txtLoader(file_path)
22
- elif ext == '.pptx':
23
- loader = UnstructuredPowerPointLoader(file_path)
24
- else:
25
- raise ValueError(f"Unsupported file extension: {ext}")
26
-
27
- docs = loader.load_and_split()
28
- # Initialize a summarization chain with the specified method
29
- summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
30
- summary = summarization_chain.run(docs)
31
- summaries.append(summary)
32
-
33
- return summaries
 
1
+ from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
2
+ from langchain_community.document_loaders import UnstructuredPowerPointLoader
3
+ from langchain_cohere.llms import Cohere
4
+ from langchain.chains.summarize import load_summarize_chain
5
+ from pathlib import Path
6
+ import os
7
+
8
+ def summarize_files(method, files):
9
+ # Initialize the LLM
10
+ llm = Cohere(temperature=0)
11
+ summaries = []
12
+ # Load and read each file
13
+ for file in os.listdir(files):
14
+
15
+ file_path = os.path.join(files, file)
16
+ ext = Path(file_path).suffix.lower()
17
+ if ext == '.pdf':
18
+ loader = PyPDFLoader(file_path)
19
+ elif ext == '.docx':
20
+ loader = Docx2txtLoader(file_path)
21
+ elif ext == '.pptx':
22
+ loader = UnstructuredPowerPointLoader(file_path)
23
+ else:
24
+ raise ValueError(f"Unsupported file extension: {ext}")
25
+
26
+ docs = loader.load_and_split()
27
+ # Initialize a summarization chain with the specified method
28
+ summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
29
+ summary = summarization_chain.run(docs)
30
+ summaries.append(summary)
31
+
32
+ return summaries