import streamlit as st from PyPDF2 import PdfReader from transformers import AutoTokenizer, AutoModelForCausalLM import torch from io import BytesIO # Initialize the tokenizer and model tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG") model = AutoModelForCausalLM.from_pretrained( "himmeow/vi-gemma-2b-RAG", device_map="auto", torch_dtype=torch.float16 # Use FP16 for faster computation if supported ) # Use GPU if available if torch.cuda.is_available(): model.to("cuda") # Streamlit app layout st.set_page_config(page_title="📄 PDF Query App", page_icon=":book:", layout="wide") st.title("📄 PDF Query App") st.sidebar.title("Upload File and Query") # Sidebar: File Upload uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf") # Sidebar: Query Input query = st.sidebar.text_input("Enter your query:") # Sidebar: Submit Button if st.sidebar.button("Submit"): if uploaded_file and query: # Read the PDF file pdf_text = "" with BytesIO(uploaded_file.read()) as file: reader = PdfReader(file) for page in reader.pages: text = page.extract_text() pdf_text += text + "\n" # Define the prompt format for the model prompt = f""" {pdf_text} Please answer the question: {query} """ # Break the text into chunks if it's too long for the model max_input_length = 2048 # Adjust based on the model's max length input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length) # Use GPU for input ids if available if torch.cuda.is_available(): input_ids = input_ids.to("cuda") # Generate text using the model outputs = model.generate( **input_ids, max_new_tokens=250, # Reduce the number of tokens generated for faster results no_repeat_ngram_size=3, # Prevent repetition num_beams=2, # Use beam search with fewer beams for faster results ) # Decode and display the results response = tokenizer.decode(outputs[0], skip_special