from langchain_openai import ChatOpenAI from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain.chains import ConversationalRetrievalChain from langchain_community.chat_message_histories import ChatMessageHistory from langchain.memory import ConversationBufferMemory from langchain_core.prompts import PromptTemplate from langchain_core.document_loaders import BaseLoader from langchain_core.documents import Document import streamlit as st import os from io import BytesIO import pdfplumber class InMemoryPDFLoader(BaseLoader): def __init__(self, file_bytes: bytes): self.file_bytes = file_bytes def load(self): pdf_stream = BytesIO(self.file_bytes) with pdfplumber.open(pdf_stream) as pdf: text = "" for page in pdf.pages: text += page.extract_text() return [Document(page_content=text)] # Access the OpenAI API key from the environment open_ai_key = os.getenv("OPENAI_API_KEY") llm = ChatOpenAI(api_key=open_ai_key) template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context} Question: {question} Only return the helpful answer below and nothing else. Helpful answer: """ prompt = PromptTemplate(template=template, input_variables=["context", "question"]) pdf_file = st.file_uploader("Upload your PDF", type="pdf") question = st.chat_input("Ask your question") if pdf_file is not None: try: pdf_bytes = pdf_file.read() loader = InMemoryPDFLoader(file_bytes=pdf_bytes) pdf_data = loader.load() # Split the text into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_documents(pdf_data) # Create a Chroma vector store embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") db = Chroma.from_documents(docs, embeddings) # Initialize message history for conversation message_history = ChatMessageHistory() # Memory for conversational context memory = ConversationBufferMemory( memory_key="chat_history", output_key="answer", chat_memory=message_history, return_messages=True, ) # Create a chain that uses the Chroma vector store chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", retriever=db.as_retriever(), memory=memory, return_source_documents=False, combine_docs_chain_kwargs={'prompt': prompt} ) if question: with st.chat_message("user"): st.markdown(question) with st.chat_message("assistant"): res = chain({"question": question}) answer = res["answer"] st.write(f"{answer}") except Exception as e: st.error(f"An error occurred: {e}")