Waseem771 commited on
Commit
6d19487
1 Parent(s): 534a594

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -53
app.py CHANGED
@@ -1,30 +1,3 @@
1
- import gradio as gr
2
- from PyPDF2 import PdfReader
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch
5
-
6
- # Load the tokenizer and model
7
- tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
8
- model = AutoModelForCausalLM.from_pretrained(
9
- "himmeow/vi-gemma-2b-RAG",
10
- device_map="auto",
11
- torch_dtype=torch.bfloat16
12
- )
13
-
14
- if torch.cuda.is_available():
15
- model.to("cuda")
16
-
17
- # Define the prompt format for the model
18
- prompt = """
19
- ### Instruction and Input:
20
- Based on the following context/document:
21
- {}
22
- Please answer the question: {}
23
-
24
- ### Response:
25
- {}
26
- """
27
-
28
  def extract_text_from_pdf(pdf):
29
  pdf_Text = ""
30
  reader = PdfReader(pdf)
@@ -32,30 +5,5 @@ def extract_text_from_pdf(pdf):
32
  page = reader.pages[page_num]
33
  text = page.extract_text()
34
  pdf_Text += text + "\n"
 
35
  return pdf_Text
36
-
37
- def generate_response(pdf, query):
38
- pdf_Text = extract_text_from_pdf(pdf)
39
- input_text = prompt.format(pdf_Text, query, " ")
40
- input_ids = tokenizer(input_text, return_tensors="pt")
41
-
42
- if torch.cuda.is_available():
43
- input_ids = input_ids.to("cuda")
44
-
45
- outputs = model.generate(
46
- **input_ids,
47
- max_new_tokens=500, # Limit the number of tokens generated
48
- no_repeat_ngram_size=5, # Prevent repetition of 5-gram phrases
49
- )
50
- return tokenizer.decode(outputs[0])
51
-
52
- # Gradio interface
53
- iface = gr.Interface(
54
- fn=generate_response,
55
- inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Ask a question")],
56
- outputs="text",
57
- title="PDF Question Answering with vi-gemma-2b-RAG",
58
- description="Upload a PDF and ask a question based on its content. The model will generate a response."
59
- )
60
-
61
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def extract_text_from_pdf(pdf):
2
  pdf_Text = ""
3
  reader = PdfReader(pdf)
 
5
  page = reader.pages[page_num]
6
  text = page.extract_text()
7
  pdf_Text += text + "\n"
8
+ print("Extracted Text:\n", pdf_Text) # Add this line to debug
9
  return pdf_Text