pip install transformers torch accelerate PyMuPDF streamlit import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch import fitz # PyMuPDF # Load the tokenizer and model @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG") model = AutoModelForCausalLM.from_pretrained( "ricepaper/vi-gemma-2b-RAG", device_map="auto", torch_dtype=torch.bfloat16 ) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) return tokenizer, model tokenizer, model = load_model() # Function to read text from a PDF file def read_pdf(file): text = "" with fitz.open("pdf", file.read()) as doc: for page in doc: text += page.get_text() return text # Streamlit app st.title("PDF Question Answering with vi-gemma-2b-RAG") st.write("Upload a PDF file, and ask a question based on its content.") uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") question = st.text_input("Enter your question:") if uploaded_file is not None and question: # Read PDF content pdf_text = read_pdf(uploaded_file) # Prepare the input for the model prompt_template = """ ### Instruction and Input: Based on the following context/documentation: {} Please answer the question: {} ### Response: {} """ input_text = prompt_template.format(pdf_text, question, "") input_ids = tokenizer(input_text, return_tensors="pt").to(model.device) # Generate a response with torch.cuda.amp.autocast(): outputs = model.generate( **input_ids, max_new_tokens=200, no_repeat_ngram_size=5 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.subheader("Answer:") st.write(response)