import chainlit as cl from chainlit.playground.providers import ChatOpenAI from dotenv import load_dotenv from langchain.document_loaders import PyMuPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter import tiktoken from langchain.prompts import ChatPromptTemplate from operator import itemgetter from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain.retrievers import MultiQueryRetriever from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.chains import create_retrieval_chain from langchain.prompts import ChatPromptTemplate from langchain.schema import StrOutputParser from langchain.schema.runnable import Runnable from langchain.schema.runnable.config import RunnableConfig from langchain.retrievers import MultiQueryRetriever from langchain.chains.combine_documents import create_stuff_documents_chain from langchain import hub template = """ you can only answer questions related to what's in the context. If it's not in the context, then you would reply with 'Sorry I have no answer to your particular question. I can only answer things regarding: {context}' Context: {context} Question: {question} """ init_settings = { "model": "gpt-3.5-turbo", "temperature": 0, "max_tokens": 500, "top_p": 1, "frequency_penalty": 0, "presence_penalty": 0, } embeddings = OpenAIEmbeddings(model="text-embedding-3-small") load_dotenv() def tiktoken_len(text): tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode( text, ) return len(tokens) @cl.on_chat_start async def main(): model = ChatOpenAI(streaming=True) prompt = ChatPromptTemplate.from_template(template) nvida_doc = PyMuPDFLoader('../docs/nvidia-document.pdf') data = nvida_doc.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1700, chunk_overlap = 0, length_function = tiktoken_len) nvidia_doc_chunks = text_splitter.split_documents(data) vector_store = FAISS.from_documents(nvidia_doc_chunks, embedding=embeddings) retriever = vector_store.as_retriever() advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model) runnable = ( {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | RunnablePassthrough.assign(context=itemgetter("context")) | {"response": prompt | model, "context": itemgetter("context")}) # retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat") # document_chain = create_stuff_documents_chain(model, retrieval_qa_prompt) # runnable = create_retrieval_chain(advanced_retriever, document_chain) # cl.user_session.set("settings", init_settings) # cl.user_session.set("nvidia_doc", data) cl.user_session.set("runnable", runnable) @cl.on_message async def on_message(message: cl.Message): # settings = cl.user_session.get("settings") # nvida_doc = cl.user_session.get("nvidia_doc") runnable = cl.user_session.get("runnable") msg = cl.Message(content="") # async for chunk in runnable.astream( # {"question": message.content}, # config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]), # ): # await msg.stream_token(chunk, True) # await msg.send() inputs = {"question": message.content} result = await runnable.ainvoke(inputs) msg = cl.Message(content=result["response"].content) await msg.send()