Spaces:

thivav
/

chat_with_pdf_using_gpt

Runtime error

App Files Files Community

thivav commited on Mar 4

Commit

d498edb

•

1 Parent(s): 05b08fb

init commit

Browse files

Files changed (11) hide show

.github/sync_to_huggingface_hub.yml +20 -0
README.md +25 -2
app.py +137 -0
data/.gitkeep +0 -0
img/OpenAI_GPT3.5-Turbo.jpg +0 -0
models/.gitkeep +0 -0
notebooks/.gitkeep +0 -0
requirements.txt +8 -0
requirements_local.txt +10 -0
runtime.txt +1 -0
src/.gitkeep +0 -0

.github/sync_to_huggingface_hub.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to HuggingFace Space
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://thivav:[email protected]/spaces/thivav/chat_with_pdf_using_gpt main

README.md CHANGED Viewed

@@ -1,2 +1,25 @@
-# chat_with_pdf_using_gpt
-Chat with pdf using OpenAI GPT

+---
+title: Chat With Pdf Using Gpt
+emoji: 🌍
+colorFrom: yellow
+colorTo: pink
+sdk: streamlit
+sdk_version: 1.31.1
+app_file: app.py
+pinned: false
+---
+![OpenAI GPT 3.5](/img/OpenAI_GPT3.5-Turbo.jpg)
+# Chat wit pdf using OpenAI GPT 3.5 📢
+#GPT3.5 | #OpenAIEmbeddings | #PDF | #Streamlit | #FAISS
+Chat with pdf using [GPT 3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
+- [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo)
+- [GPT-3.5 Turbo fine-tuning and API updates](https://openai.com/blog/gpt-3-5-turbo-fine-tuning-and-api-updates)
+- [Streamlit Chat Message History](https://python.langchain.com/docs/integrations/memory/streamlit_chat_message_history)
+- [FAISS](https://python.langchain.com/docs/integrations/vectorstores/faiss)
+[Chat with PDF using OpenAI GPT 3.5 Turbo - Playground](https://huggingface.co/spaces/thivav/chat_with_pdf_using_gpt)

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# import os
+import os
+import tempfile
+import streamlit as st
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import FAISS
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from streamlit_extras.add_vertical_space import add_vertical_space
+@st.cache_resource(ttl="1h")
+def load_retriever(pdf_files):
+    """load pdf files"""
+    docs = []
+    temp_dir = tempfile.TemporaryDirectory()
+    for pdf_file in pdf_files:
+        temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
+        with open(temp_pdf_file_path, "wb") as f:
+            f.write(pdf_file.getvalue())
+        loader = PyPDFLoader(temp_pdf_file_path)
+        docs.extend(loader.load())
+    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+        chunk_size=1500, chunk_overlap=200
+    )
+    chunks = text_splitter.split_documents(docs)
+    # embeddings
+    embeddings = OpenAIEmbeddings()
+    vector_db = FAISS.from_documents(chunks, embeddings)
+    retriever = vector_db.as_retriever(
+        search_type="similarity_score_threshold",
+        search_kwargs={"score_threshold": 0.5, "k": 5},
+    )
+    return retriever
+def main():
+    """main"""
+    st.set_page_config(
+        page_title="Talk to PDF using GPT 3.5",
+        page_icon="📰",
+        layout="centered",
+        initial_sidebar_state="expanded",
+    )
+    st.header("Talk to PDF files 📰", divider="rainbow")
+    st.subheader(
+        "Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo"
+    )
+    st.sidebar.title("Talk to PDF 📰")
+    st.sidebar.markdown(
+        "[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)"
+    )
+    st.sidebar.markdown(
+        """
+            ### This is a LLM powered chatbot, built using:
+            * [Streamlit](https://streamlit.io)
+            * [LangChain](https://python.langchain.com/)
+            * [OpenAI](https://platform.openai.com/docs/models)
+            ___
+            """
+    )
+    add_vertical_space(2)
+    openai_key = st.sidebar.text_input(label="Enter the OpenAI key 👇", type="password")
+    if not openai_key:
+        st.info("👈 :red[Please enter the OpenAI key] ⛔")
+        st.stop()
+    # set the OPENAI_API_KEY to environment
+    os.environ["OPENAI_API_KEY"] = openai_key
+    add_vertical_space(1)
+    upload_pdf_files = st.sidebar.file_uploader(
+        "Upload a pdf files 📤", type="pdf", accept_multiple_files=True
+    )
+    if not upload_pdf_files:
+        st.info("👈 :red[Please upload pdf files] ⛔")
+        st.stop()
+    retriever = load_retriever(upload_pdf_files)
+    chat_history = StreamlitChatMessageHistory()
+    # init chat history memory
+    memory = ConversationBufferMemory(
+        memory_key="chat_history", chat_memory=chat_history, return_messages=True
+    )
+    llm = ChatOpenAI(
+        model_name="gpt-3.5-turbo",
+        openai_api_key=openai_key,
+        temperature=0,
+        streaming=True,
+    )
+    chain = ConversationalRetrievalChain.from_llm(
+        llm, retriever=retriever, memory=memory, verbose=False
+    )
+    # load previous chat history
+    # re-draw the chat history in the chat window
+    for message in chat_history.messages:
+        st.chat_message(message.type).write(message.content)
+    if prompt := st.chat_input("Ask questions"):
+        with st.chat_message("human"):
+            st.markdown(prompt)
+        response = chain.run(prompt)
+        with st.chat_message("ai"):
+            st.write(response)
+if __name__ == "__main__":
+    # init streamlit
+    main()

data/.gitkeep ADDED Viewed

File without changes

img/OpenAI_GPT3.5-Turbo.jpg ADDED Viewed

models/.gitkeep ADDED Viewed

File without changes

notebooks/.gitkeep ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pypdf==4.0.2
+langchain==0.1.9
+streamlit==1.31.1
+streamlit-extras==0.4.0
+faiss-cpu==1.7.4
+openai==1.12.0
+tiktoken==0.6.0
+langchain-openai==0.0.8

requirements_local.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+ipykernel
+ipywidgets
+pypdf==4.0.2
+langchain==0.1.9
+streamlit==1.31.1
+streamlit-extras==0.4.0
+faiss-cpu==1.7.4
+openai==1.12.0
+tiktoken==0.6.0
+langchain-openai==0.0.8

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.9.0

src/.gitkeep ADDED Viewed

File without changes