Spaces:
Runtime error
Runtime error
init commit
Browse files- .github/sync_to_huggingface_hub.yml +20 -0
- README.md +25 -2
- app.py +137 -0
- data/.gitkeep +0 -0
- img/OpenAI_GPT3.5-Turbo.jpg +0 -0
- models/.gitkeep +0 -0
- notebooks/.gitkeep +0 -0
- requirements.txt +8 -0
- requirements_local.txt +10 -0
- runtime.txt +1 -0
- src/.gitkeep +0 -0
.github/sync_to_huggingface_hub.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to HuggingFace Space
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
# to run this workflow manually from the Actions tab
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync-to-hub:
|
11 |
+
runs-on: ubuntu-latest
|
12 |
+
steps:
|
13 |
+
- uses: actions/checkout@v3
|
14 |
+
with:
|
15 |
+
fetch-depth: 0
|
16 |
+
lfs: true
|
17 |
+
- name: Push to hub
|
18 |
+
env:
|
19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
20 |
+
run: git push --force https://thivav:[email protected]/spaces/thivav/chat_with_pdf_using_gpt main
|
README.md
CHANGED
@@ -1,2 +1,25 @@
|
|
1 |
-
|
2 |
-
Chat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Chat With Pdf Using Gpt
|
3 |
+
emoji: π
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: pink
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.31.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
![OpenAI GPT 3.5](/img/OpenAI_GPT3.5-Turbo.jpg)
|
13 |
+
|
14 |
+
# Chat wit pdf using OpenAI GPT 3.5 π’
|
15 |
+
|
16 |
+
#GPT3.5 | #OpenAIEmbeddings | #PDF | #Streamlit | #FAISS
|
17 |
+
|
18 |
+
Chat with pdf using [GPT 3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
|
19 |
+
|
20 |
+
- [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
|
21 |
+
- [GPT-3.5 Turbo fine-tuning and API updates](https://openai.com/blog/gpt-3-5-turbo-fine-tuning-and-api-updates)
|
22 |
+
- [Streamlit Chat Message History](https://python.langchain.com/docs/integrations/memory/streamlit_chat_message_history)
|
23 |
+
- [FAISS](https://python.langchain.com/docs/integrations/vectorstores/faiss)
|
24 |
+
|
25 |
+
[Chat with PDF using OpenAI GPT 3.5 Turbo - Playground](https://huggingface.co/spaces/thivav/chat_with_pdf_using_gpt)
|
app.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import os
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
import streamlit as st
|
6 |
+
from langchain.chains import ConversationalRetrievalChain
|
7 |
+
from langchain.memory import ConversationBufferMemory
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
11 |
+
from langchain_community.vectorstores import FAISS
|
12 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
13 |
+
from streamlit_extras.add_vertical_space import add_vertical_space
|
14 |
+
|
15 |
+
|
16 |
+
@st.cache_resource(ttl="1h")
|
17 |
+
def load_retriever(pdf_files):
|
18 |
+
"""load pdf files"""
|
19 |
+
|
20 |
+
docs = []
|
21 |
+
temp_dir = tempfile.TemporaryDirectory()
|
22 |
+
for pdf_file in pdf_files:
|
23 |
+
temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
|
24 |
+
|
25 |
+
with open(temp_pdf_file_path, "wb") as f:
|
26 |
+
f.write(pdf_file.getvalue())
|
27 |
+
|
28 |
+
loader = PyPDFLoader(temp_pdf_file_path)
|
29 |
+
docs.extend(loader.load())
|
30 |
+
|
31 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
32 |
+
chunk_size=1500, chunk_overlap=200
|
33 |
+
)
|
34 |
+
chunks = text_splitter.split_documents(docs)
|
35 |
+
|
36 |
+
# embeddings
|
37 |
+
embeddings = OpenAIEmbeddings()
|
38 |
+
|
39 |
+
vector_db = FAISS.from_documents(chunks, embeddings)
|
40 |
+
|
41 |
+
retriever = vector_db.as_retriever(
|
42 |
+
search_type="similarity_score_threshold",
|
43 |
+
search_kwargs={"score_threshold": 0.5, "k": 5},
|
44 |
+
)
|
45 |
+
|
46 |
+
return retriever
|
47 |
+
|
48 |
+
|
49 |
+
def main():
|
50 |
+
"""main"""
|
51 |
+
|
52 |
+
st.set_page_config(
|
53 |
+
page_title="Talk to PDF using GPT 3.5",
|
54 |
+
page_icon="π°",
|
55 |
+
layout="centered",
|
56 |
+
initial_sidebar_state="expanded",
|
57 |
+
)
|
58 |
+
|
59 |
+
st.header("Talk to PDF files π°", divider="rainbow")
|
60 |
+
st.subheader(
|
61 |
+
"Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo"
|
62 |
+
)
|
63 |
+
|
64 |
+
st.sidebar.title("Talk to PDF π°")
|
65 |
+
st.sidebar.markdown(
|
66 |
+
"[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)"
|
67 |
+
)
|
68 |
+
st.sidebar.markdown(
|
69 |
+
"""
|
70 |
+
### This is a LLM powered chatbot, built using:
|
71 |
+
|
72 |
+
* [Streamlit](https://streamlit.io)
|
73 |
+
* [LangChain](https://python.langchain.com/)
|
74 |
+
* [OpenAI](https://platform.openai.com/docs/models)
|
75 |
+
___
|
76 |
+
"""
|
77 |
+
)
|
78 |
+
|
79 |
+
add_vertical_space(2)
|
80 |
+
|
81 |
+
openai_key = st.sidebar.text_input(label="Enter the OpenAI key π", type="password")
|
82 |
+
|
83 |
+
if not openai_key:
|
84 |
+
st.info("π :red[Please enter the OpenAI key] β")
|
85 |
+
st.stop()
|
86 |
+
|
87 |
+
# set the OPENAI_API_KEY to environment
|
88 |
+
os.environ["OPENAI_API_KEY"] = openai_key
|
89 |
+
|
90 |
+
add_vertical_space(1)
|
91 |
+
|
92 |
+
upload_pdf_files = st.sidebar.file_uploader(
|
93 |
+
"Upload a pdf files π€", type="pdf", accept_multiple_files=True
|
94 |
+
)
|
95 |
+
|
96 |
+
if not upload_pdf_files:
|
97 |
+
st.info("π :red[Please upload pdf files] β")
|
98 |
+
st.stop()
|
99 |
+
|
100 |
+
retriever = load_retriever(upload_pdf_files)
|
101 |
+
|
102 |
+
chat_history = StreamlitChatMessageHistory()
|
103 |
+
|
104 |
+
# init chat history memory
|
105 |
+
memory = ConversationBufferMemory(
|
106 |
+
memory_key="chat_history", chat_memory=chat_history, return_messages=True
|
107 |
+
)
|
108 |
+
|
109 |
+
llm = ChatOpenAI(
|
110 |
+
model_name="gpt-3.5-turbo",
|
111 |
+
openai_api_key=openai_key,
|
112 |
+
temperature=0,
|
113 |
+
streaming=True,
|
114 |
+
)
|
115 |
+
|
116 |
+
chain = ConversationalRetrievalChain.from_llm(
|
117 |
+
llm, retriever=retriever, memory=memory, verbose=False
|
118 |
+
)
|
119 |
+
|
120 |
+
# load previous chat history
|
121 |
+
# re-draw the chat history in the chat window
|
122 |
+
for message in chat_history.messages:
|
123 |
+
st.chat_message(message.type).write(message.content)
|
124 |
+
|
125 |
+
if prompt := st.chat_input("Ask questions"):
|
126 |
+
with st.chat_message("human"):
|
127 |
+
st.markdown(prompt)
|
128 |
+
|
129 |
+
response = chain.run(prompt)
|
130 |
+
|
131 |
+
with st.chat_message("ai"):
|
132 |
+
st.write(response)
|
133 |
+
|
134 |
+
|
135 |
+
if __name__ == "__main__":
|
136 |
+
# init streamlit
|
137 |
+
main()
|
data/.gitkeep
ADDED
File without changes
|
img/OpenAI_GPT3.5-Turbo.jpg
ADDED
models/.gitkeep
ADDED
File without changes
|
notebooks/.gitkeep
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pypdf==4.0.2
|
2 |
+
langchain==0.1.9
|
3 |
+
streamlit==1.31.1
|
4 |
+
streamlit-extras==0.4.0
|
5 |
+
faiss-cpu==1.7.4
|
6 |
+
openai==1.12.0
|
7 |
+
tiktoken==0.6.0
|
8 |
+
langchain-openai==0.0.8
|
requirements_local.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ipykernel
|
2 |
+
ipywidgets
|
3 |
+
pypdf==4.0.2
|
4 |
+
langchain==0.1.9
|
5 |
+
streamlit==1.31.1
|
6 |
+
streamlit-extras==0.4.0
|
7 |
+
faiss-cpu==1.7.4
|
8 |
+
openai==1.12.0
|
9 |
+
tiktoken==0.6.0
|
10 |
+
langchain-openai==0.0.8
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.9.0
|
src/.gitkeep
ADDED
File without changes
|