Waseem771 commited on
Commit
c837340
1 Parent(s): 90f8861

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ # Load the tokenizer and model
7
+ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ "himmeow/vi-gemma-2b-RAG",
10
+ device_map="auto",
11
+ torch_dtype=torch.bfloat16
12
+ )
13
+
14
+ if torch.cuda.is_available():
15
+ model.to("cuda")
16
+
17
+ # Define the prompt format for the model
18
+ prompt = """
19
+ ### Instruction and Input:
20
+ Based on the following context/document:
21
+ {}
22
+ Please answer the question: {}
23
+
24
+ ### Response:
25
+ {}
26
+ """
27
+
28
+ def extract_text_from_pdf(pdf):
29
+ pdf_Text = ""
30
+ reader = PdfReader(pdf)
31
+ for page_num in range(len(reader.pages)):
32
+ page = reader.pages[page_num]
33
+ text = page.extract_text()
34
+ pdf_Text += text + "\n"
35
+ return pdf_Text
36
+
37
+ def generate_response(pdf, query):
38
+ pdf_Text = extract_text_from_pdf(pdf)
39
+ input_text = prompt.format(pdf_Text, query, " ")
40
+ input_ids = tokenizer(input_text, return_tensors="pt")
41
+
42
+ if torch.cuda.is_available():
43
+ input_ids = input_ids.to("cuda")
44
+
45
+ outputs = model.generate(
46
+ **input_ids,
47
+ max_new_tokens=500, # Limit the number of tokens generated
48
+ no_repeat_ngram_size=5, # Prevent repetition of 5-gram phrases
49
+ )
50
+ return tokenizer.decode(outputs[0])
51
+
52
+ # Gradio interface
53
+ iface = gr.Interface(
54
+ fn=generate_response,
55
+ inputs=[gr.inputs.File(label="Upload PDF"), gr.inputs.Textbox(label="Ask a question")],
56
+ outputs="text",
57
+ title="PDF Question Answering with vi-gemma-2b-RAG",
58
+ description="Upload a PDF and ask a question based on its content. The model will generate a response."
59
+ )
60
+
61
+ iface.launch()