Waseem771 commited on
Commit
3907dec
1 Parent(s): 6d19487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -2
app.py CHANGED
@@ -1,9 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def extract_text_from_pdf(pdf):
2
  pdf_Text = ""
3
  reader = PdfReader(pdf)
4
  for page_num in range(len(reader.pages)):
5
  page = reader.pages[page_num]
6
  text = page.extract_text()
7
- pdf_Text += text + "\n"
8
- print("Extracted Text:\n", pdf_Text) # Add this line to debug
 
 
 
9
  return pdf_Text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ # Load the tokenizer and model
7
+ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ "himmeow/vi-gemma-2b-RAG",
10
+ device_map="auto",
11
+ torch_dtype=torch.bfloat16
12
+ )
13
+
14
+ if torch.cuda.is_available():
15
+ model.to("cuda")
16
+
17
+ # Define the prompt format for the model
18
+ prompt = """
19
+ ### Instruction and Input:
20
+ Based on the following context/document:
21
+ {}
22
+ Please answer the question: {}
23
+
24
+ ### Response:
25
+ {}
26
+ """
27
+
28
  def extract_text_from_pdf(pdf):
29
  pdf_Text = ""
30
  reader = PdfReader(pdf)
31
  for page_num in range(len(reader.pages)):
32
  page = reader.pages[page_num]
33
  text = page.extract_text()
34
+ if text:
35
+ pdf_Text += text + "\n"
36
+ if not pdf_Text.strip():
37
+ pdf_Text = "The PDF contains no extractable text."
38
+ print("Extracted Text:\n", pdf_Text) # Debugging statement
39
  return pdf_Text
40
+
41
+ def generate_response(pdf, query):
42
+ pdf_Text = extract_text_from_pdf(pdf)
43
+ if not pdf_Text.strip():
44
+ return "The PDF appears to be empty or unreadable."
45
+
46
+ input_text = prompt.format(pdf_Text, query, " ")
47
+ print("Input Text for Model:\n", input_text) # Debugging statement
48
+
49
+ input_ids = tokenizer(input_text, return_tensors="pt")
50
+
51
+ if torch.cuda.is_available():
52
+ input_ids = input_ids.to("cuda")
53
+
54
+ try:
55
+ outputs = model.generate(
56
+ **input_ids,
57
+ max_new_tokens=500,
58
+ no_repeat_ngram_size=5,
59
+ )
60
+ response = tokenizer.decode(outputs[0])
61
+ except Exception as e:
62
+ response = "An error occurred while generating the response."
63
+ print("Error:", e)
64
+
65
+ print("Generated Response:\n", response) # Debugging statement
66
+ return response
67
+
68
+ # Gradio interface
69
+ iface = gr.Interface(
70
+ fn=generate_response,
71
+ inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Ask a question")],
72
+ outputs="text",
73
+ title="PDF Question Answering with vi-gemma-2b-RAG",
74
+ description="Upload a PDF and ask a question based on its content. The model will generate a response."
75
+ )
76
+
77
+ iface.launch()