Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,30 +1,3 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from PyPDF2 import PdfReader
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
-
import torch
|
5 |
-
|
6 |
-
# Load the tokenizer and model
|
7 |
-
tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
|
8 |
-
model = AutoModelForCausalLM.from_pretrained(
|
9 |
-
"himmeow/vi-gemma-2b-RAG",
|
10 |
-
device_map="auto",
|
11 |
-
torch_dtype=torch.bfloat16
|
12 |
-
)
|
13 |
-
|
14 |
-
if torch.cuda.is_available():
|
15 |
-
model.to("cuda")
|
16 |
-
|
17 |
-
# Define the prompt format for the model
|
18 |
-
prompt = """
|
19 |
-
### Instruction and Input:
|
20 |
-
Based on the following context/document:
|
21 |
-
{}
|
22 |
-
Please answer the question: {}
|
23 |
-
|
24 |
-
### Response:
|
25 |
-
{}
|
26 |
-
"""
|
27 |
-
|
28 |
def extract_text_from_pdf(pdf):
|
29 |
pdf_Text = ""
|
30 |
reader = PdfReader(pdf)
|
@@ -32,30 +5,5 @@ def extract_text_from_pdf(pdf):
|
|
32 |
page = reader.pages[page_num]
|
33 |
text = page.extract_text()
|
34 |
pdf_Text += text + "\n"
|
|
|
35 |
return pdf_Text
|
36 |
-
|
37 |
-
def generate_response(pdf, query):
|
38 |
-
pdf_Text = extract_text_from_pdf(pdf)
|
39 |
-
input_text = prompt.format(pdf_Text, query, " ")
|
40 |
-
input_ids = tokenizer(input_text, return_tensors="pt")
|
41 |
-
|
42 |
-
if torch.cuda.is_available():
|
43 |
-
input_ids = input_ids.to("cuda")
|
44 |
-
|
45 |
-
outputs = model.generate(
|
46 |
-
**input_ids,
|
47 |
-
max_new_tokens=500, # Limit the number of tokens generated
|
48 |
-
no_repeat_ngram_size=5, # Prevent repetition of 5-gram phrases
|
49 |
-
)
|
50 |
-
return tokenizer.decode(outputs[0])
|
51 |
-
|
52 |
-
# Gradio interface
|
53 |
-
iface = gr.Interface(
|
54 |
-
fn=generate_response,
|
55 |
-
inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Ask a question")],
|
56 |
-
outputs="text",
|
57 |
-
title="PDF Question Answering with vi-gemma-2b-RAG",
|
58 |
-
description="Upload a PDF and ask a question based on its content. The model will generate a response."
|
59 |
-
)
|
60 |
-
|
61 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
def extract_text_from_pdf(pdf):
|
2 |
pdf_Text = ""
|
3 |
reader = PdfReader(pdf)
|
|
|
5 |
page = reader.pages[page_num]
|
6 |
text = page.extract_text()
|
7 |
pdf_Text += text + "\n"
|
8 |
+
print("Extracted Text:\n", pdf_Text) # Add this line to debug
|
9 |
return pdf_Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|