import os from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.document_loaders import PyPDFium2Loader from langchain.chains.question_answering import load_qa_chain # from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI class PDFQuery: def __init__(self): os.environ["OPENAI_API_KEY"] = "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" self.embeddings = OpenAIEmbeddings() self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200) # self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key) self.llm = ChatOpenAI(temperature=0) self.chain = None self.db = None def ask(self, question: str) -> str: if self.chain is None: response = "Please, add a document." else: docs = self.db.get_relevant_documents(question) response = self.chain.run(input_documents=docs, question=question) return response def ingest(self, file_path: os.PathLike) -> None: loader = PyPDFium2Loader(file_path) documents = loader.load() splitted_documents = self.text_splitter.split_documents(documents) self.db = Chroma.from_documents(splitted_documents, self.embeddings).as_retriever() # self.chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff") self.chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff")