Spaces:

vineethn
/

qna

Running

qna / app.py

vineeth N

Update app.py

29d0fc0 verified 5 days ago

4.23 kB

	import os
	import streamlit as st
	from typing import List
	from dotenv import load_dotenv
	from langchain_community.embeddings import OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.chains import RetrievalQA
	from langchain_openai import ChatOpenAI
	from langchain_openai import OpenAIEmbeddings
	import tempfile

	# Load environment variables
	load_dotenv()

	# Initialize OpenAI API key
	openai_api_key = os.getenv('OPENAI_API_KEY')

	# Initialize embedding model using OpenAI
	embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")

	# Initialize vector store
	vector_store = None

	# Store PDF file paths
	pdf_files = {}

	# Define the path for the FAISS index
	FAISS_INDEX_PATH = "faiss_index"
	FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")

	@st.cache_resource
	def process_pdf(uploaded_file):
	"""Process the uploaded PDF and add it to the vector store."""
	global vector_store, pdf_files

	# Create a temporary file to store the uploaded PDF
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	tmp_file_path = tmp_file.name

	loader = PyPDFLoader(tmp_file_path)
	documents = loader.load()
	pdf_files[uploaded_file.name] = tmp_file_path

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(documents)

	if vector_store is None:
	vector_store = FAISS.from_documents(texts, embeddings)
	else:
	vector_store.add_documents(texts)

	# Save the updated vector store
	if not os.path.exists(FAISS_INDEX_PATH):
	os.makedirs(FAISS_INDEX_PATH)
	vector_store.save_local(FAISS_INDEX_PATH)

	# Clean up the temporary file
	os.unlink(tmp_file_path)

	def main():
	st.title("PDF Question Answering System")

	# File uploader
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	if uploaded_file is not None:
	process_pdf(uploaded_file)
	st.success(f"PDF '{uploaded_file.name}' processed. You can now ask questions!")

	# User input
	user_question = st.text_input("Ask a question about the PDFs:")

	if user_question:
	if vector_store is None:
	st.error("Error: No PDFs have been uploaded yet.")
	return

	retriever = vector_store.as_retriever(search_kwargs={"k": 3})

	# Initialize the OpenAI language model
	llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)

	result = qa_chain(user_question)
	answer = result['result']
	source_docs = result['source_documents']

	st.write("Answer:", answer)

	if source_docs:
	st.subheader("Sources:")
	unique_sources = set()
	for doc in source_docs:
	file_name = os.path.basename(doc.metadata['source'])
	if file_name in pdf_files and file_name not in unique_sources:
	unique_sources.add(file_name)
	file_path = pdf_files[file_name]
	st.write(f"Source: {file_name}")
	with open(file_path, "rb") as file:
	st.download_button(
	label=f"Download {file_name}",
	data=file,
	file_name=file_name,
	mime="application/pdf"
	)

	other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
	unique_other_sources = set(other_sources)
	if unique_other_sources:
	st.subheader("Other Sources:")
	for source in unique_other_sources:
	st.write(f"- {source}")

	if __name__ == "__main__":
	main()