Spaces:

Waseem7711
/

RAG_Chat_Bot

Sleeping

App Files Files Community

RAG_Chat_Bot / app.py

Waseem7711

Update app.py

7f39b78 verified 25 days ago

raw

history blame

No virus

1.88 kB

	pip install transformers torch accelerate PyMuPDF streamlit



	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import fitz # PyMuPDF

	# Load the tokenizer and model
	@st.cache_resource
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
	model = AutoModelForCausalLM.from_pretrained(
	"ricepaper/vi-gemma-2b-RAG",
	device_map="auto",
	torch_dtype=torch.bfloat16
	)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	return tokenizer, model

	tokenizer, model = load_model()

	# Function to read text from a PDF file
	def read_pdf(file):
	text = ""
	with fitz.open("pdf", file.read()) as doc:
	for page in doc:
	text += page.get_text()
	return text

	# Streamlit app
	st.title("PDF Question Answering with vi-gemma-2b-RAG")
	st.write("Upload a PDF file, and ask a question based on its content.")

	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
	question = st.text_input("Enter your question:")

	if uploaded_file is not None and question:
	# Read PDF content
	pdf_text = read_pdf(uploaded_file)

	# Prepare the input for the model
	prompt_template = """
	### Instruction and Input:
	Based on the following context/documentation:
	{}
	Please answer the question: {}

	### Response:
	{}
	"""
	input_text = prompt_template.format(pdf_text, question, "")
	input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)

	# Generate a response
	with torch.cuda.amp.autocast():
	outputs = model.generate(
	**input_ids,
	max_new_tokens=200,
	no_repeat_ngram_size=5
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	st.subheader("Answer:")
	st.write(response)