File size: 1,802 Bytes
47c8018
 
 
 
 
 
59bc9a2
47c8018
 
 
59bc9a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47c8018
59bc9a2
 
 
 
 
 
 
 
 
47c8018
59bc9a2
 
 
 
 
47c8018
59bc9a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
from langchain_community.document_loaders import UnstructuredPowerPointLoader
from langchain_cohere.llms import Cohere
from langchain.chains.summarize import load_summarize_chain
from pathlib import Path


def summarize_files(method, files):
    # Initialize the LLM
    llm = Cohere(temperature=0)

    ext = Path(file).suffix.lower()
    if ext == '.pdf':
        loader = PyPDFLoader(file)
    elif ext == '.docx':
        loader = Docx2txtLoader(file)
    elif ext == '.pptx':
        loader = UnstructuredPowerPointLoader(file)
    else:
        raise ValueError(f"Unsupported file extension: {ext}")
    
    docs = loader.load_and_split()
    # Initialize a summarization chain with the specified method
    summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
    summary = summarization_chain.run(docs)
    
    return summary





# def summarize_files(method, files):
#     # Initialize the LLM
#     llm = Cohere(temperature=0)
#     summaries = []
#     # Load and read each file
#     for file in files:
        
#         ext = Path(file).suffix.lower()
#         if ext == '.pdf':
#             loader = PyPDFLoader(file)
#         elif ext == '.docx':
#             loader = Docx2txtLoader(file)
#         elif ext == '.pptx':
#             loader = UnstructuredPowerPointLoader(file)
#         else:
#             raise ValueError(f"Unsupported file extension: {ext}")
        
#         docs = loader.load_and_split()
#         # Initialize a summarization chain with the specified method
#         summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
#         summary = summarization_chain.run(docs)
#         summaries.append(summary)    
    
#     return summaries