HeRksTAn commited on
Commit
073e458
1 Parent(s): 72a395f
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. README.md +0 -10
  3. app.py +122 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
README.md CHANGED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Midterm Project
3
- emoji: 🏢
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chainlit as cl
2
+ from chainlit.playground.providers import ChatOpenAI
3
+ from dotenv import load_dotenv
4
+ from langchain_community.document_loaders import PyMuPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ import tiktoken
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from operator import itemgetter
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from langchain_core.output_parsers import StrOutputParser
11
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
12
+ from langchain_community.vectorstores import FAISS
13
+ from langchain_openai import OpenAIEmbeddings
14
+ from langchain.retrievers import MultiQueryRetriever
15
+ from langchain.chains.combine_documents import create_stuff_documents_chain
16
+ from langchain.chains import create_retrieval_chain
17
+
18
+ from langchain.prompts import ChatPromptTemplate
19
+ from langchain.schema import StrOutputParser
20
+ from langchain.schema.runnable import Runnable
21
+ from langchain.schema.runnable.config import RunnableConfig
22
+ from langchain.retrievers import MultiQueryRetriever
23
+ from langchain.chains.combine_documents import create_stuff_documents_chain
24
+ from langchain import hub
25
+
26
+ template = """
27
+ you can only answer questions related to what's in the context. If it's not in the context, then you would reply with
28
+ 'Sorry I have no answer to your particular question. I can only answer things regarding: {context}'
29
+
30
+ Context:
31
+ {context}
32
+
33
+ Question:
34
+ {question}
35
+ """
36
+
37
+ init_settings = {
38
+ "model": "gpt-3.5-turbo",
39
+ "temperature": 0,
40
+ "max_tokens": 500,
41
+ "top_p": 1,
42
+ "frequency_penalty": 0,
43
+ "presence_penalty": 0,
44
+ }
45
+
46
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
47
+
48
+
49
+ load_dotenv()
50
+
51
+ def tiktoken_len(text):
52
+ tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
53
+ text,
54
+ )
55
+ return len(tokens)
56
+
57
+
58
+ @cl.on_chat_start
59
+ async def main():
60
+ model = ChatOpenAI(streaming=True)
61
+
62
+ prompt = ChatPromptTemplate.from_template(template)
63
+
64
+ nvida_doc = PyMuPDFLoader('../docs/nvidia-document.pdf')
65
+ data = nvida_doc.load()
66
+
67
+ text_splitter = RecursiveCharacterTextSplitter(
68
+ chunk_size = 1700,
69
+ chunk_overlap = 0,
70
+ length_function = tiktoken_len)
71
+
72
+ nvidia_doc_chunks = text_splitter.split_documents(data)
73
+
74
+ vector_store = FAISS.from_documents(nvidia_doc_chunks, embedding=embeddings)
75
+
76
+ retriever = vector_store.as_retriever()
77
+ advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model)
78
+
79
+ runnable = (
80
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
81
+ | RunnablePassthrough.assign(context=itemgetter("context"))
82
+ | {"response": prompt | model, "context": itemgetter("context")})
83
+
84
+ # retrieval_qa_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
85
+ # document_chain = create_stuff_documents_chain(model, retrieval_qa_prompt)
86
+ # runnable = create_retrieval_chain(advanced_retriever, document_chain)
87
+
88
+ # cl.user_session.set("settings", init_settings)
89
+ # cl.user_session.set("nvidia_doc", data)
90
+
91
+ cl.user_session.set("runnable", runnable)
92
+
93
+
94
+
95
+ @cl.on_message
96
+ async def on_message(message: cl.Message):
97
+ # settings = cl.user_session.get("settings")
98
+ # nvida_doc = cl.user_session.get("nvidia_doc")
99
+ runnable = cl.user_session.get("runnable")
100
+
101
+ msg = cl.Message(content="")
102
+
103
+
104
+ # async for chunk in runnable.astream(
105
+ # {"question": message.content},
106
+ # config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
107
+ # ):
108
+ # await msg.stream_token(chunk, True)
109
+
110
+ # await msg.send()
111
+
112
+ inputs = {"question": message.content}
113
+ result = await runnable.ainvoke(inputs)
114
+ msg = cl.Message(content=result["response"].content)
115
+ await msg.send()
116
+
117
+
118
+
119
+
120
+
121
+
122
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ chainlit==0.7.700
2
+ cohere==4.37
3
+ openai==1.3.5
4
+ tiktoken==0.5.1
5
+ python-dotenv==1.0.0
6
+ langchain-community=0.1.0