vineeth N commited on
Commit
29d0fc0
1 Parent(s): 4f4392b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain_community.document_loaders import PyPDFLoader
9
  from langchain.chains import RetrievalQA
10
  from langchain_openai import ChatOpenAI
11
  from langchain_openai import OpenAIEmbeddings
 
12
 
13
  # Load environment variables
14
  load_dotenv()
@@ -30,51 +31,51 @@ FAISS_INDEX_PATH = "faiss_index"
30
  FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
31
 
32
  @st.cache_resource
33
- def process_pdfs(directory: str) -> None:
34
- """Process all PDFs in the given directory and add them to the vector store."""
35
  global vector_store, pdf_files
36
- documents = []
 
 
 
 
37
 
38
- for filename in os.listdir(directory):
39
- if filename.endswith(".pdf"):
40
- file_path = os.path.join(directory, filename)
41
- loader = PyPDFLoader(file_path)
42
- documents.extend(loader.load())
43
- pdf_files[filename] = file_path
44
 
45
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
46
  texts = text_splitter.split_documents(documents)
47
 
48
- if os.path.exists(FAISS_INDEX_FILE):
49
- try:
50
- vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
51
- vector_store.add_documents(texts)
52
- except Exception as e:
53
- st.error(f"Error loading FAISS index: {e}")
54
- vector_store = FAISS.from_documents(texts, embeddings)
55
- else:
56
  vector_store = FAISS.from_documents(texts, embeddings)
 
 
57
 
58
  # Save the updated vector store
59
  if not os.path.exists(FAISS_INDEX_PATH):
60
  os.makedirs(FAISS_INDEX_PATH)
61
  vector_store.save_local(FAISS_INDEX_PATH)
62
 
 
 
 
63
  def main():
64
  st.title("PDF Question Answering System")
65
 
66
- # Process PDFs
67
- pdf_directory = "/path/to/your/pdf/directory" # Update this path
68
- process_pdfs(pdf_directory)
69
 
70
- st.success("PDFs processed. You can now ask questions!")
 
 
71
 
72
  # User input
73
  user_question = st.text_input("Ask a question about the PDFs:")
74
 
75
  if user_question:
76
  if vector_store is None:
77
- st.error("Error: Vector store not initialized.")
78
  return
79
 
80
  retriever = vector_store.as_retriever(search_kwargs={"k": 3})
 
9
  from langchain.chains import RetrievalQA
10
  from langchain_openai import ChatOpenAI
11
  from langchain_openai import OpenAIEmbeddings
12
+ import tempfile
13
 
14
  # Load environment variables
15
  load_dotenv()
 
31
  FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
32
 
33
  @st.cache_resource
34
+ def process_pdf(uploaded_file):
35
+ """Process the uploaded PDF and add it to the vector store."""
36
  global vector_store, pdf_files
37
+
38
+ # Create a temporary file to store the uploaded PDF
39
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
40
+ tmp_file.write(uploaded_file.getvalue())
41
+ tmp_file_path = tmp_file.name
42
 
43
+ loader = PyPDFLoader(tmp_file_path)
44
+ documents = loader.load()
45
+ pdf_files[uploaded_file.name] = tmp_file_path
 
 
 
46
 
47
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
48
  texts = text_splitter.split_documents(documents)
49
 
50
+ if vector_store is None:
 
 
 
 
 
 
 
51
  vector_store = FAISS.from_documents(texts, embeddings)
52
+ else:
53
+ vector_store.add_documents(texts)
54
 
55
  # Save the updated vector store
56
  if not os.path.exists(FAISS_INDEX_PATH):
57
  os.makedirs(FAISS_INDEX_PATH)
58
  vector_store.save_local(FAISS_INDEX_PATH)
59
 
60
+ # Clean up the temporary file
61
+ os.unlink(tmp_file_path)
62
+
63
  def main():
64
  st.title("PDF Question Answering System")
65
 
66
+ # File uploader
67
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 
68
 
69
+ if uploaded_file is not None:
70
+ process_pdf(uploaded_file)
71
+ st.success(f"PDF '{uploaded_file.name}' processed. You can now ask questions!")
72
 
73
  # User input
74
  user_question = st.text_input("Ask a question about the PDFs:")
75
 
76
  if user_question:
77
  if vector_store is None:
78
+ st.error("Error: No PDFs have been uploaded yet.")
79
  return
80
 
81
  retriever = vector_store.as_retriever(search_kwargs={"k": 3})