Spaces:
Runtime error
Runtime error
File size: 3,888 Bytes
d498edb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# import os
import os
import tempfile
import streamlit as st
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from streamlit_extras.add_vertical_space import add_vertical_space
@st.cache_resource(ttl="1h")
def load_retriever(pdf_files):
"""load pdf files"""
docs = []
temp_dir = tempfile.TemporaryDirectory()
for pdf_file in pdf_files:
temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
with open(temp_pdf_file_path, "wb") as f:
f.write(pdf_file.getvalue())
loader = PyPDFLoader(temp_pdf_file_path)
docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=1500, chunk_overlap=200
)
chunks = text_splitter.split_documents(docs)
# embeddings
embeddings = OpenAIEmbeddings()
vector_db = FAISS.from_documents(chunks, embeddings)
retriever = vector_db.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"score_threshold": 0.5, "k": 5},
)
return retriever
def main():
"""main"""
st.set_page_config(
page_title="Talk to PDF using GPT 3.5",
page_icon="π°",
layout="centered",
initial_sidebar_state="expanded",
)
st.header("Talk to PDF files π°", divider="rainbow")
st.subheader(
"Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo"
)
st.sidebar.title("Talk to PDF π°")
st.sidebar.markdown(
"[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)"
)
st.sidebar.markdown(
"""
### This is a LLM powered chatbot, built using:
* [Streamlit](https://streamlit.io)
* [LangChain](https://python.langchain.com/)
* [OpenAI](https://platform.openai.com/docs/models)
___
"""
)
add_vertical_space(2)
openai_key = st.sidebar.text_input(label="Enter the OpenAI key π", type="password")
if not openai_key:
st.info("π :red[Please enter the OpenAI key] β")
st.stop()
# set the OPENAI_API_KEY to environment
os.environ["OPENAI_API_KEY"] = openai_key
add_vertical_space(1)
upload_pdf_files = st.sidebar.file_uploader(
"Upload a pdf files π€", type="pdf", accept_multiple_files=True
)
if not upload_pdf_files:
st.info("π :red[Please upload pdf files] β")
st.stop()
retriever = load_retriever(upload_pdf_files)
chat_history = StreamlitChatMessageHistory()
# init chat history memory
memory = ConversationBufferMemory(
memory_key="chat_history", chat_memory=chat_history, return_messages=True
)
llm = ChatOpenAI(
model_name="gpt-3.5-turbo",
openai_api_key=openai_key,
temperature=0,
streaming=True,
)
chain = ConversationalRetrievalChain.from_llm(
llm, retriever=retriever, memory=memory, verbose=False
)
# load previous chat history
# re-draw the chat history in the chat window
for message in chat_history.messages:
st.chat_message(message.type).write(message.content)
if prompt := st.chat_input("Ask questions"):
with st.chat_message("human"):
st.markdown(prompt)
response = chain.run(prompt)
with st.chat_message("ai"):
st.write(response)
if __name__ == "__main__":
# init streamlit
main()
|