Spaces:

Waseem7711
/

RAG_Chat_Bot

Sleeping

App Files Files Community

Waseem7711 commited on Aug 29

Commit

df61c4d

•

1 Parent(s): 1386537

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -62

app.py CHANGED Viewed

@@ -1,49 +1,3 @@
-Hugging Face's logo
-Hugging Face
-Search models, datasets, users...
-Models
-Datasets
-Spaces
-Posts
-Docs
-Pricing
-Spaces:
-Waseem7711
-/
-RAG_Chat_Bot
-like
-0
-App
-Files
-Community
-Settings
-RAG_Chat_Bot
-/
-app.py
-Waseem7711's picture
-Waseem7711
-Update app.py
-43c74e3
-verified
-12 minutes ago
-raw
-Copy download link
-history
-blame
-edit
-delete
-No virus
-2.21 kB
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
@@ -52,6 +6,7 @@ import fitz  # PyMuPDF for PDF handling
 # Load the model and tokenizer
 @st.cache_resource
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
     model = AutoModelForCausalLM.from_pretrained(
         "himmeow/vi-gemma-2b-RAG",
@@ -64,6 +19,7 @@ def load_model():
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     text = ""
     for page_num in range(doc.page_count):
@@ -73,45 +29,54 @@ def extract_text_from_pdf(pdf_file):
 # Function to generate response from model
 def generate_response(input_text, query, tokenizer, model):
-    prompt = """
     ### Instruction and Input:
     Based on the following context/document:
-    {}
-    Please answer the question: {}
     ### Response:
-    {}
     """
-    formatted_input = prompt.format(input_text, query, " ")
-    input_ids = tokenizer(formatted_input, return_tensors="pt")
     if torch.cuda.is_available():
         input_ids = input_ids.to("cuda")
     outputs = model.generate(
         **input_ids,
         max_new_tokens=500,
         no_repeat_ngram_size=5
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Streamlit app
 def main():
     st.title("PDF Question Answering with vi-gemma-2b-RAG")
     pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     if pdf_file is not None:
         with st.spinner("Reading the PDF..."):
             pdf_text = extract_text_from_pdf(pdf_file)
         st.text_area("Extracted Text", pdf_text, height=300)
         query = st.text_input("Enter your question:")
         if st.button("Get Answer"):
-            with st.spinner("Generating response..."):
-                tokenizer, model = load_model()
-                response = generate_response(pdf_text, query, tokenizer, model)
-                st.text_area("Response", response, height=200)
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # Load the model and tokenizer
 @st.cache_resource
 def load_model():
+    # Load the tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
     model = AutoModelForCausalLM.from_pretrained(
         "himmeow/vi-gemma-2b-RAG",
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
+    # Extract text from the uploaded PDF file using PyMuPDF
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     text = ""
     for page_num in range(doc.page_count):
 # Function to generate response from model
 def generate_response(input_text, query, tokenizer, model):
+    # Format the input prompt for the model
+    prompt = f"""
     ### Instruction and Input:
     Based on the following context/document:
+    {input_text}
+    Please answer the question: {query}
     ### Response:
     """
+    input_ids = tokenizer(prompt, return_tensors="pt")
     if torch.cuda.is_available():
         input_ids = input_ids.to("cuda")
+    # Generate a response from the model
     outputs = model.generate(
         **input_ids,
         max_new_tokens=500,
         no_repeat_ngram_size=5
     )
+    # Decode the generated output into readable text
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Streamlit app main function
 def main():
     st.title("PDF Question Answering with vi-gemma-2b-RAG")
+    # File uploader widget for PDF files
     pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     if pdf_file is not None:
         with st.spinner("Reading the PDF..."):
+            # Extract text from the uploaded PDF
             pdf_text = extract_text_from_pdf(pdf_file)
         st.text_area("Extracted Text", pdf_text, height=300)
+        # Text input for the user's question
         query = st.text_input("Enter your question:")
         if st.button("Get Answer"):
+            if query.strip() == "":
+                st.warning("Please enter a question.")
+            else:
+                with st.spinner("Generating response..."):
+                    # Load the model and tokenizer
+                    tokenizer, model = load_model()
+                    # Generate the response using the model
+                    response = generate_response(pdf_text, query, tokenizer, model)
+                    st.text_area("Response", response, height=200)
 if __name__ == "__main__":
     main()