thivav commited on
Commit
d498edb
β€’
1 Parent(s): 05b08fb

init commit

Browse files
.github/sync_to_huggingface_hub.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to HuggingFace Space
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://thivav:[email protected]/spaces/thivav/chat_with_pdf_using_gpt main
README.md CHANGED
@@ -1,2 +1,25 @@
1
- # chat_with_pdf_using_gpt
2
- Chat with pdf using OpenAI GPT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chat With Pdf Using Gpt
3
+ emoji: 🌍
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: streamlit
7
+ sdk_version: 1.31.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ ![OpenAI GPT 3.5](/img/OpenAI_GPT3.5-Turbo.jpg)
13
+
14
+ # Chat wit pdf using OpenAI GPT 3.5 πŸ“’
15
+
16
+ #GPT3.5 | #OpenAIEmbeddings | #PDF | #Streamlit | #FAISS
17
+
18
+ Chat with pdf using [GPT 3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
19
+
20
+ - [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
21
+ - [GPT-3.5 Turbo fine-tuning and API updates](https://openai.com/blog/gpt-3-5-turbo-fine-tuning-and-api-updates)
22
+ - [Streamlit Chat Message History](https://python.langchain.com/docs/integrations/memory/streamlit_chat_message_history)
23
+ - [FAISS](https://python.langchain.com/docs/integrations/vectorstores/faiss)
24
+
25
+ [Chat with PDF using OpenAI GPT 3.5 Turbo - Playground](https://huggingface.co/spaces/thivav/chat_with_pdf_using_gpt)
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os
2
+ import os
3
+ import tempfile
4
+
5
+ import streamlit as st
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_community.chat_message_histories import StreamlitChatMessageHistory
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
13
+ from streamlit_extras.add_vertical_space import add_vertical_space
14
+
15
+
16
+ @st.cache_resource(ttl="1h")
17
+ def load_retriever(pdf_files):
18
+ """load pdf files"""
19
+
20
+ docs = []
21
+ temp_dir = tempfile.TemporaryDirectory()
22
+ for pdf_file in pdf_files:
23
+ temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
24
+
25
+ with open(temp_pdf_file_path, "wb") as f:
26
+ f.write(pdf_file.getvalue())
27
+
28
+ loader = PyPDFLoader(temp_pdf_file_path)
29
+ docs.extend(loader.load())
30
+
31
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
32
+ chunk_size=1500, chunk_overlap=200
33
+ )
34
+ chunks = text_splitter.split_documents(docs)
35
+
36
+ # embeddings
37
+ embeddings = OpenAIEmbeddings()
38
+
39
+ vector_db = FAISS.from_documents(chunks, embeddings)
40
+
41
+ retriever = vector_db.as_retriever(
42
+ search_type="similarity_score_threshold",
43
+ search_kwargs={"score_threshold": 0.5, "k": 5},
44
+ )
45
+
46
+ return retriever
47
+
48
+
49
+ def main():
50
+ """main"""
51
+
52
+ st.set_page_config(
53
+ page_title="Talk to PDF using GPT 3.5",
54
+ page_icon="πŸ“°",
55
+ layout="centered",
56
+ initial_sidebar_state="expanded",
57
+ )
58
+
59
+ st.header("Talk to PDF files πŸ“°", divider="rainbow")
60
+ st.subheader(
61
+ "Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo"
62
+ )
63
+
64
+ st.sidebar.title("Talk to PDF πŸ“°")
65
+ st.sidebar.markdown(
66
+ "[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)"
67
+ )
68
+ st.sidebar.markdown(
69
+ """
70
+ ### This is a LLM powered chatbot, built using:
71
+
72
+ * [Streamlit](https://streamlit.io)
73
+ * [LangChain](https://python.langchain.com/)
74
+ * [OpenAI](https://platform.openai.com/docs/models)
75
+ ___
76
+ """
77
+ )
78
+
79
+ add_vertical_space(2)
80
+
81
+ openai_key = st.sidebar.text_input(label="Enter the OpenAI key πŸ‘‡", type="password")
82
+
83
+ if not openai_key:
84
+ st.info("πŸ‘ˆ :red[Please enter the OpenAI key] β›”")
85
+ st.stop()
86
+
87
+ # set the OPENAI_API_KEY to environment
88
+ os.environ["OPENAI_API_KEY"] = openai_key
89
+
90
+ add_vertical_space(1)
91
+
92
+ upload_pdf_files = st.sidebar.file_uploader(
93
+ "Upload a pdf files πŸ“€", type="pdf", accept_multiple_files=True
94
+ )
95
+
96
+ if not upload_pdf_files:
97
+ st.info("πŸ‘ˆ :red[Please upload pdf files] β›”")
98
+ st.stop()
99
+
100
+ retriever = load_retriever(upload_pdf_files)
101
+
102
+ chat_history = StreamlitChatMessageHistory()
103
+
104
+ # init chat history memory
105
+ memory = ConversationBufferMemory(
106
+ memory_key="chat_history", chat_memory=chat_history, return_messages=True
107
+ )
108
+
109
+ llm = ChatOpenAI(
110
+ model_name="gpt-3.5-turbo",
111
+ openai_api_key=openai_key,
112
+ temperature=0,
113
+ streaming=True,
114
+ )
115
+
116
+ chain = ConversationalRetrievalChain.from_llm(
117
+ llm, retriever=retriever, memory=memory, verbose=False
118
+ )
119
+
120
+ # load previous chat history
121
+ # re-draw the chat history in the chat window
122
+ for message in chat_history.messages:
123
+ st.chat_message(message.type).write(message.content)
124
+
125
+ if prompt := st.chat_input("Ask questions"):
126
+ with st.chat_message("human"):
127
+ st.markdown(prompt)
128
+
129
+ response = chain.run(prompt)
130
+
131
+ with st.chat_message("ai"):
132
+ st.write(response)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ # init streamlit
137
+ main()
data/.gitkeep ADDED
File without changes
img/OpenAI_GPT3.5-Turbo.jpg ADDED
models/.gitkeep ADDED
File without changes
notebooks/.gitkeep ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pypdf==4.0.2
2
+ langchain==0.1.9
3
+ streamlit==1.31.1
4
+ streamlit-extras==0.4.0
5
+ faiss-cpu==1.7.4
6
+ openai==1.12.0
7
+ tiktoken==0.6.0
8
+ langchain-openai==0.0.8
requirements_local.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ipykernel
2
+ ipywidgets
3
+ pypdf==4.0.2
4
+ langchain==0.1.9
5
+ streamlit==1.31.1
6
+ streamlit-extras==0.4.0
7
+ faiss-cpu==1.7.4
8
+ openai==1.12.0
9
+ tiktoken==0.6.0
10
+ langchain-openai==0.0.8
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9.0
src/.gitkeep ADDED
File without changes