Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PyPDF2 import PdfReader | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
from io import BytesIO | |
# Initialize the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG") | |
model = AutoModelForCausalLM.from_pretrained( | |
"himmeow/vi-gemma-2b-RAG", | |
device_map="auto", | |
torch_dtype=torch.bfloat16 | |
) | |
# Use GPU if available | |
if torch.cuda.is_available(): | |
model.to("cuda") | |
# Streamlit app layout | |
st.set_page_config(page_title="π PDF Query App", page_icon=":book:", layout="wide") | |
st.title("π PDF Query App") | |
st.sidebar.title("Upload File and Query") | |
# Sidebar: File Upload | |
uploaded_file = st.sidebar.file_uploader("Upload your PDF file", type="pdf") | |
# Sidebar: Query Input | |
query = st.sidebar.text_input("Enter your query:") | |
# Sidebar: Submit Button | |
if st.sidebar.button("Submit"): | |
if uploaded_file and query: | |
# Read the PDF file | |
pdf_text = "" | |
with BytesIO(uploaded_file.read()) as file: | |
reader = PdfReader(file) | |
for page_num in range(len(reader.pages)): | |
page = reader.pages[page_num] | |
text = page.extract_text() | |
pdf_text += text + "\n" | |
# Define the prompt format for the model | |
prompt = """ | |
### Instruction and Input: | |
Based on the following context/document: | |
{} | |
Please answer the question: {} | |
### Response: | |
{} | |
""" | |
# Format the input text | |
input_text = prompt.format(pdf_text, query, " ") | |
# Encode the input text into input ids | |
input_ids = tokenizer(input_text, return_tensors="pt") | |
# Use GPU for input ids if available | |
if torch.cuda.is_available(): | |
input_ids = input_ids.to("cuda") | |
# Generate text using the model | |
outputs = model.generate( | |
**input_ids, | |
max_new_tokens=500, # Limit the number of tokens generated | |
no_repeat_ngram_size=5, # Prevent repetition of 5-gram phrases | |
) | |
# Decode and display the results | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
st.write(response) | |
else: | |
st.sidebar.error("Please upload a PDF file and enter a query.") | |
# Footer with LinkedIn link | |
st.sidebar.write("---") | |
st.sidebar.write("Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)") |