nagaki sophiamyang commited on
Commit
10434d6
0 Parent(s):

Duplicate from sophiamyang/Panel_PDF_QA

Browse files

Co-authored-by: Sophia Yang <[email protected]>

Files changed (5) hide show
  1. .gitattributes +34 -0
  2. Dockerfile +16 -0
  3. LangChain_QA_Panel_App.ipynb +255 -0
  4. README.md +11 -0
  5. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+ RUN python3 -m pip install --no-cache-dir --upgrade pip
7
+ RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["panel", "serve", "/code/LangChain_QA_Panel_App.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "sophiamyang-panel-pdf-qa.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
12
+
13
+ RUN mkdir /.cache
14
+ RUN chmod 777 /.cache
15
+ RUN mkdir .chroma
16
+ RUN chmod 777 .chroma
LangChain_QA_Panel_App.ipynb ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f",
6
+ "metadata": {
7
+ "tags": []
8
+ },
9
+ "source": [
10
+ "# LangChain QA Panel App\n",
11
+ "\n",
12
+ "This notebook shows how to make this app:"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
19
+ "metadata": {
20
+ "tags": []
21
+ },
22
+ "outputs": [],
23
+ "source": [
24
+ "#!pip install langchain openai chromadb tiktoken pypdf panel\n"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
31
+ "metadata": {
32
+ "tags": []
33
+ },
34
+ "outputs": [],
35
+ "source": [
36
+ "import os \n",
37
+ "from langchain.chains import RetrievalQA\n",
38
+ "from langchain.llms import OpenAI\n",
39
+ "from langchain.document_loaders import TextLoader\n",
40
+ "from langchain.document_loaders import PyPDFLoader\n",
41
+ "from langchain.indexes import VectorstoreIndexCreator\n",
42
+ "from langchain.text_splitter import CharacterTextSplitter\n",
43
+ "from langchain.embeddings import OpenAIEmbeddings\n",
44
+ "from langchain.vectorstores import Chroma\n",
45
+ "import panel as pn\n",
46
+ "import tempfile\n"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": null,
52
+ "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
53
+ "metadata": {
54
+ "tags": []
55
+ },
56
+ "outputs": [],
57
+ "source": [
58
+ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
59
+ "pn.state.template.param.update(\n",
60
+ " main_max_width=\"690px\",\n",
61
+ " header_background=\"#F08080\",\n",
62
+ ")"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
69
+ "metadata": {
70
+ "tags": []
71
+ },
72
+ "outputs": [],
73
+ "source": [
74
+ "file_input = pn.widgets.FileInput(width=300)\n",
75
+ "\n",
76
+ "openaikey = pn.widgets.PasswordInput(\n",
77
+ " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
78
+ ")\n",
79
+ "prompt = pn.widgets.TextEditor(\n",
80
+ " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
81
+ ")\n",
82
+ "run_button = pn.widgets.Button(name=\"Run!\")\n",
83
+ "\n",
84
+ "select_k = pn.widgets.IntSlider(\n",
85
+ " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
86
+ ")\n",
87
+ "select_chain_type = pn.widgets.RadioButtonGroup(\n",
88
+ " name='Chain type', \n",
89
+ " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
90
+ ")\n",
91
+ "\n",
92
+ "widgets = pn.Row(\n",
93
+ " pn.Column(prompt, run_button, margin=5),\n",
94
+ " pn.Card(\n",
95
+ " \"Chain type:\",\n",
96
+ " pn.Column(select_chain_type, select_k),\n",
97
+ " title=\"Advanced settings\", margin=10\n",
98
+ " ), width=600\n",
99
+ ")"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
106
+ "metadata": {
107
+ "tags": []
108
+ },
109
+ "outputs": [],
110
+ "source": [
111
+ "def qa(file, query, chain_type, k):\n",
112
+ " # load document\n",
113
+ " loader = PyPDFLoader(file)\n",
114
+ " documents = loader.load()\n",
115
+ " # split the documents into chunks\n",
116
+ " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
117
+ " texts = text_splitter.split_documents(documents)\n",
118
+ " # select which embeddings we want to use\n",
119
+ " embeddings = OpenAIEmbeddings()\n",
120
+ " # create the vectorestore to use as the index\n",
121
+ " db = Chroma.from_documents(texts, embeddings)\n",
122
+ " # expose this index in a retriever interface\n",
123
+ " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
124
+ " # create a chain to answer questions \n",
125
+ " qa = RetrievalQA.from_chain_type(\n",
126
+ " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
127
+ " result = qa({\"query\": query})\n",
128
+ " print(result['result'])\n",
129
+ " return result"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": null,
135
+ "id": "2722f43b-daf6-4d17-a842-41203ae9b140",
136
+ "metadata": {
137
+ "tags": []
138
+ },
139
+ "outputs": [],
140
+ "source": [
141
+ "# result = qa(\"example.pdf\", \"what is the total number of AI publications?\")"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": null,
147
+ "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
148
+ "metadata": {},
149
+ "outputs": [],
150
+ "source": [
151
+ "convos = [] # store all panel objects in a list\n",
152
+ "\n",
153
+ "def qa_result(_):\n",
154
+ " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
155
+ " \n",
156
+ " # save pdf file to a temp file \n",
157
+ " if file_input.value is not None:\n",
158
+ " file_input.save(\"/.cache/temp.pdf\")\n",
159
+ " \n",
160
+ " prompt_text = prompt.value\n",
161
+ " if prompt_text:\n",
162
+ " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
163
+ " convos.extend([\n",
164
+ " pn.Row(\n",
165
+ " pn.panel(\"\\U0001F60A\", width=10),\n",
166
+ " prompt_text,\n",
167
+ " width=600\n",
168
+ " ),\n",
169
+ " pn.Row(\n",
170
+ " pn.panel(\"\\U0001F916\", width=10),\n",
171
+ " pn.Column(\n",
172
+ " result[\"result\"],\n",
173
+ " \"Relevant source text:\",\n",
174
+ " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
175
+ " )\n",
176
+ " )\n",
177
+ " ])\n",
178
+ " #return convos\n",
179
+ " return pn.Column(*convos, margin=15, width=575, min_height=400)\n"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
186
+ "metadata": {
187
+ "tags": []
188
+ },
189
+ "outputs": [],
190
+ "source": [
191
+ "qa_interactive = pn.panel(\n",
192
+ " pn.bind(qa_result, run_button),\n",
193
+ " loading_indicator=True,\n",
194
+ ")"
195
+ ]
196
+ },
197
+ {
198
+ "cell_type": "code",
199
+ "execution_count": null,
200
+ "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
201
+ "metadata": {
202
+ "tags": []
203
+ },
204
+ "outputs": [],
205
+ "source": [
206
+ "output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
213
+ "metadata": {
214
+ "tags": []
215
+ },
216
+ "outputs": [],
217
+ "source": [
218
+ "# layout\n",
219
+ "pn.Column(\n",
220
+ " pn.pane.Markdown(\"\"\"\n",
221
+ " ## \\U0001F60A! Question Answering with your PDF file\n",
222
+ " \n",
223
+ " 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\".\n",
224
+ " \n",
225
+ " \"\"\"),\n",
226
+ " pn.Row(file_input,openaikey),\n",
227
+ " output,\n",
228
+ " widgets\n",
229
+ "\n",
230
+ ").servable()"
231
+ ]
232
+ }
233
+ ],
234
+ "metadata": {
235
+ "kernelspec": {
236
+ "display_name": "Python 3 (ipykernel)",
237
+ "language": "python",
238
+ "name": "python3"
239
+ },
240
+ "language_info": {
241
+ "codemirror_mode": {
242
+ "name": "ipython",
243
+ "version": 3
244
+ },
245
+ "file_extension": ".py",
246
+ "mimetype": "text/x-python",
247
+ "name": "python",
248
+ "nbconvert_exporter": "python",
249
+ "pygments_lexer": "ipython3",
250
+ "version": "3.10.10"
251
+ }
252
+ },
253
+ "nbformat": 4,
254
+ "nbformat_minor": 5
255
+ }
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Panel PDF QA
3
+ emoji: 📈
4
+ colorFrom: pink
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: sophiamyang/Panel_PDF_QA
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ chromadb
4
+ pypdf
5
+ tiktoken
6
+ panel
7
+ notebook