RAG_Chat_Bot / app.py
Waseem7711's picture
Update app.py
7f39b78 verified
raw
history blame
No virus
1.88 kB
pip install transformers torch accelerate PyMuPDF streamlit
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import fitz # PyMuPDF
# Load the tokenizer and model
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
model = AutoModelForCausalLM.from_pretrained(
"ricepaper/vi-gemma-2b-RAG",
device_map="auto",
torch_dtype=torch.bfloat16
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
return tokenizer, model
tokenizer, model = load_model()
# Function to read text from a PDF file
def read_pdf(file):
text = ""
with fitz.open("pdf", file.read()) as doc:
for page in doc:
text += page.get_text()
return text
# Streamlit app
st.title("PDF Question Answering with vi-gemma-2b-RAG")
st.write("Upload a PDF file, and ask a question based on its content.")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
question = st.text_input("Enter your question:")
if uploaded_file is not None and question:
# Read PDF content
pdf_text = read_pdf(uploaded_file)
# Prepare the input for the model
prompt_template = """
### Instruction and Input:
Based on the following context/documentation:
{}
Please answer the question: {}
### Response:
{}
"""
input_text = prompt_template.format(pdf_text, question, "")
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
# Generate a response
with torch.cuda.amp.autocast():
outputs = model.generate(
**input_ids,
max_new_tokens=200,
no_repeat_ngram_size=5
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.subheader("Answer:")
st.write(response)