from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader from langchain_community.document_loaders import UnstructuredPowerPointLoader from langchain_cohere.llms import Cohere from langchain.chains.summarize import load_summarize_chain from pathlib import Path def summarize_file(method, file): # Initialize the LLM llm = Cohere(temperature=0) ext = Path(file).suffix.lower() if ext == '.pdf': loader = PyPDFLoader(file) elif ext == '.docx': loader = Docx2txtLoader(file) elif ext == '.pptx': loader = UnstructuredPowerPointLoader(file) else: raise ValueError(f"Unsupported file extension: {ext}") docs = loader.load_and_split() # Initialize a summarization chain with the specified method summarization_chain = load_summarize_chain(llm=llm, chain_type=method) summary = summarization_chain.run(docs) return summary # def summarize_files(method, files): # # Initialize the LLM # llm = Cohere(temperature=0) # summaries = [] # # Load and read each file # for file in files: # ext = Path(file).suffix.lower() # if ext == '.pdf': # loader = PyPDFLoader(file) # elif ext == '.docx': # loader = Docx2txtLoader(file) # elif ext == '.pptx': # loader = UnstructuredPowerPointLoader(file) # else: # raise ValueError(f"Unsupported file extension: {ext}") # docs = loader.load_and_split() # # Initialize a summarization chain with the specified method # summarization_chain = load_summarize_chain(llm=llm, chain_type=method) # summary = summarization_chain.run(docs) # summaries.append(summary) # return summaries