Spaces:
Sleeping
Sleeping
Waseem7711
commited on
Commit
•
d39f2ea
1
Parent(s):
df61c4d
Update app.py
Browse files
app.py
CHANGED
@@ -24,7 +24,7 @@ def extract_text_from_pdf(pdf_file):
|
|
24 |
text = ""
|
25 |
for page_num in range(doc.page_count):
|
26 |
page = doc.load_page(page_num)
|
27 |
-
text += page.get_text()
|
28 |
return text
|
29 |
|
30 |
# Function to generate response from model
|
@@ -35,18 +35,19 @@ def generate_response(input_text, query, tokenizer, model):
|
|
35 |
Based on the following context/document:
|
36 |
{input_text}
|
37 |
Please answer the question: {query}
|
38 |
-
|
39 |
### Response:
|
40 |
"""
|
41 |
-
input_ids = tokenizer(prompt, return_tensors="pt")
|
42 |
if torch.cuda.is_available():
|
43 |
input_ids = input_ids.to("cuda")
|
|
|
44 |
# Generate a response from the model
|
45 |
outputs = model.generate(
|
46 |
-
|
47 |
max_new_tokens=500,
|
48 |
no_repeat_ngram_size=5
|
49 |
)
|
|
|
50 |
# Decode the generated output into readable text
|
51 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
52 |
|
@@ -75,8 +76,11 @@ def main():
|
|
75 |
# Load the model and tokenizer
|
76 |
tokenizer, model = load_model()
|
77 |
# Generate the response using the model
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
if __name__ == "__main__":
|
82 |
main()
|
|
|
24 |
text = ""
|
25 |
for page_num in range(doc.page_count):
|
26 |
page = doc.load_page(page_num)
|
27 |
+
text += page.get_text("text") # Ensure text extraction
|
28 |
return text
|
29 |
|
30 |
# Function to generate response from model
|
|
|
35 |
Based on the following context/document:
|
36 |
{input_text}
|
37 |
Please answer the question: {query}
|
|
|
38 |
### Response:
|
39 |
"""
|
40 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
41 |
if torch.cuda.is_available():
|
42 |
input_ids = input_ids.to("cuda")
|
43 |
+
|
44 |
# Generate a response from the model
|
45 |
outputs = model.generate(
|
46 |
+
input_ids=input_ids,
|
47 |
max_new_tokens=500,
|
48 |
no_repeat_ngram_size=5
|
49 |
)
|
50 |
+
|
51 |
# Decode the generated output into readable text
|
52 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
53 |
|
|
|
76 |
# Load the model and tokenizer
|
77 |
tokenizer, model = load_model()
|
78 |
# Generate the response using the model
|
79 |
+
try:
|
80 |
+
response = generate_response(pdf_text, query, tokenizer, model)
|
81 |
+
st.text_area("Response", response, height=200)
|
82 |
+
except Exception as e:
|
83 |
+
st.error(f"Error generating response: {e}")
|
84 |
|
85 |
if __name__ == "__main__":
|
86 |
main()
|