datascientist22 commited on
Commit
72b04bc
1 Parent(s): c1c7f8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -23
app.py CHANGED
@@ -9,7 +9,7 @@ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "himmeow/vi-gemma-2b-RAG",
11
  device_map="auto",
12
- torch_dtype=torch.float16 # Use FP16 for faster computation if supported
13
  )
14
 
15
  # Use GPU if available
@@ -34,33 +34,51 @@ if st.sidebar.button("Submit"):
34
  pdf_text = ""
35
  with BytesIO(uploaded_file.read()) as file:
36
  reader = PdfReader(file)
37
- for page in reader.pages:
 
38
  text = page.extract_text()
39
  pdf_text += text + "\n"
40
 
41
- # Define the prompt format for the model
42
- prompt = f"""
43
- {pdf_text}
44
-
45
- Please answer the question: {query}
46
 
47
- """
 
 
 
 
 
48
 
49
- # Break the text into chunks if it's too long for the model
50
- max_input_length = 2048 # Adjust based on the model's max length
51
- input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
52
 
53
- # Use GPU for input ids if available
54
- if torch.cuda.is_available():
55
- input_ids = input_ids.to("cuda")
56
 
57
- # Generate text using the model
58
- outputs = model.generate(
59
- **input_ids,
60
- max_new_tokens=250, # Reduce the number of tokens generated for faster results
61
- no_repeat_ngram_size=3, # Prevent repetition
62
- num_beams=2, # Use beam search with fewer beams for faster results
63
- )
64
 
65
- # Decode and display the results
66
- response = tokenizer.decode(outputs[0], skip_special
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "himmeow/vi-gemma-2b-RAG",
11
  device_map="auto",
12
+ torch_dtype=torch.bfloat16
13
  )
14
 
15
  # Use GPU if available
 
34
  pdf_text = ""
35
  with BytesIO(uploaded_file.read()) as file:
36
  reader = PdfReader(file)
37
+ for page_num in range(len(reader.pages)):
38
+ page = reader.pages[page_num]
39
  text = page.extract_text()
40
  pdf_text += text + "\n"
41
 
42
+ # Chunk the text to fit within model limits
43
+ max_chunk_size = 2000 # Adjust as needed for your model's token limit
44
+ chunks = [pdf_text[i:i + max_chunk_size] for i in range(0, len(pdf_text), max_chunk_size)]
 
 
45
 
46
+ responses = []
47
+ for chunk in chunks:
48
+ prompt = f"""
49
+ {chunk}
50
+
51
+ Please answer the question: {query}
52
 
53
+ """
 
 
54
 
55
+ # Encode the input text into input ids
56
+ input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
 
57
 
58
+ # Use GPU for input ids if available
59
+ if torch.cuda.is_available():
60
+ input_ids = input_ids.to("cuda")
 
 
 
 
61
 
62
+ # Generate text using the model
63
+ outputs = model.generate(
64
+ **input_ids,
65
+ max_new_tokens=250, # Reduce the number of tokens generated
66
+ no_repeat_ngram_size=3, # Adjust for faster generation
67
+ num_beams=2, # Use beam search with fewer beams for faster results
68
+ )
69
+
70
+ # Decode and store the response
71
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
72
+ responses.append(response)
73
+
74
+ # Combine responses and display them
75
+ combined_response = "\n".join(responses)
76
+ clean_response = combined_response.replace("### Instruction and Input:", "").replace("### Response:", "").strip()
77
+
78
+ st.write(clean_response)
79
+ else:
80
+ st.sidebar.error("Please upload a PDF file and enter a query.")
81
+
82
+ # Footer with LinkedIn link
83
+ st.sidebar.write("---")
84
+ st.sidebar.write("Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)")