pip install transformers torch accelerate PyMuPDF streamlit


import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import fitz  # PyMuPDF

# Load the tokenizer and model
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
    model = AutoModelForCausalLM.from_pretrained(
        "ricepaper/vi-gemma-2b-RAG",
        device_map="auto",
        torch_dtype=torch.bfloat16
    )
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    return tokenizer, model

tokenizer, model = load_model()

# Function to read text from a PDF file
def read_pdf(file):
    text = ""
    with fitz.open("pdf", file.read()) as doc:
        for page in doc:
            text += page.get_text()
    return text

# Streamlit app
st.title("PDF Question Answering with vi-gemma-2b-RAG")
st.write("Upload a PDF file, and ask a question based on its content.")

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
question = st.text_input("Enter your question:")

if uploaded_file is not None and question:
    # Read PDF content
    pdf_text = read_pdf(uploaded_file)
    
    # Prepare the input for the model
    prompt_template = """
    ### Instruction and Input:
    Based on the following context/documentation:
    {}
    Please answer the question: {}

    ### Response:
    {}
    """
    input_text = prompt_template.format(pdf_text, question, "")
    input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)

    # Generate a response
    with torch.cuda.amp.autocast():
        outputs = model.generate(
            **input_ids,
            max_new_tokens=200,
            no_repeat_ngram_size=5
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    st.subheader("Answer:")
    st.write(response)