Spaces:

datascientist22
/

rag-pdfQA-chatbot

Sleeping

App Files Files Community

datascientist22 commited on Sep 3

Commit

249d7ba

•

1 Parent(s): 62e64fb

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -35

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 from PyPDF2 import PdfReader
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # Initialize the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
@@ -26,48 +27,51 @@ uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf")
 # Sidebar: Query Input
 query = st.sidebar.text_input("Enter your query:")
-# Handle file upload
-if uploaded_file and query:
-    # Read the PDF file
-    pdf_text = ""
-    with open(uploaded_file, "rb") as file:
-        reader = PdfReader(file)
-        for page_num in range(len(reader.pages)):
-            page = reader.pages[page_num]
-            text = page.extract_text()
-            pdf_text += text + "\n"
-    # Define the prompt format for the model
-    prompt = """
-    ### Instruction and Input:
-    Based on the following context/document:
-    {}
-    Please answer the question: {}
-    ### Response:
-    {}
-    """
-    # Format the input text
-    input_text = prompt.format(pdf_text, query, " ")
-    # Encode the input text into input ids
-    input_ids = tokenizer(input_text, return_tensors="pt")
-    # Use GPU for input ids if available
-    if torch.cuda.is_available():
-        input_ids = input_ids.to("cuda")
-    # Generate text using the model
-    outputs = model.generate(
-        **input_ids,
-        max_new_tokens=500,  # Limit the number of tokens generated
-        no_repeat_ngram_size=5,  # Prevent repetition of 5-gram phrases
-    )
-    # Decode and display the results
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    st.write(response)
 # Footer with LinkedIn link
 st.sidebar.write("---")

 from PyPDF2 import PdfReader
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+from io import BytesIO
 # Initialize the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
 # Sidebar: Query Input
 query = st.sidebar.text_input("Enter your query:")
+# Sidebar: Submit Button
+if st.sidebar.button("Submit"):
+    if uploaded_file and query:
+        # Read the PDF file
+        pdf_text = ""
+        with BytesIO(uploaded_file.read()) as file:
+            reader = PdfReader(file)
+            for page_num in range(len(reader.pages)):
+                page = reader.pages[page_num]
+                text = page.extract_text()
+                pdf_text += text + "\n"
+        # Define the prompt format for the model
+        prompt = """
+        ### Instruction and Input:
+        Based on the following context/document:
+        {}
+        Please answer the question: {}
+        ### Response:
+        {}
+        """
+        # Format the input text
+        input_text = prompt.format(pdf_text, query, " ")
+        # Encode the input text into input ids
+        input_ids = tokenizer(input_text, return_tensors="pt")
+        # Use GPU for input ids if available
+        if torch.cuda.is_available():
+            input_ids = input_ids.to("cuda")
+        # Generate text using the model
+        outputs = model.generate(
+            **input_ids,
+            max_new_tokens=500,  # Limit the number of tokens generated
+            no_repeat_ngram_size=5,  # Prevent repetition of 5-gram phrases
+        )
+        # Decode and display the results
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        st.write(response)
+    else:
+        st.sidebar.error("Please upload a PDF file and enter a query.")
 # Footer with LinkedIn link
 st.sidebar.write("---")