datascientist22 commited on
Commit
c1c7f8f
1 Parent(s): 985d58e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -26
app.py CHANGED
@@ -9,7 +9,7 @@ tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "himmeow/vi-gemma-2b-RAG",
11
  device_map="auto",
12
- torch_dtype=torch.bfloat16
13
  )
14
 
15
  # Use GPU if available
@@ -34,25 +34,21 @@ if st.sidebar.button("Submit"):
34
  pdf_text = ""
35
  with BytesIO(uploaded_file.read()) as file:
36
  reader = PdfReader(file)
37
- for page_num in range(len(reader.pages)):
38
- page = reader.pages[page_num]
39
  text = page.extract_text()
40
  pdf_text += text + "\n"
41
 
42
  # Define the prompt format for the model
43
- prompt = """
44
- {}
45
 
46
- Please answer the question: {}
47
 
48
- {}
49
  """
50
 
51
- # Format the input text
52
- input_text = prompt.format(pdf_text, query, " ")
53
-
54
- # Encode the input text into input ids
55
- input_ids = tokenizer(input_text, return_tensors="pt")
56
 
57
  # Use GPU for input ids if available
58
  if torch.cuda.is_available():
@@ -61,20 +57,10 @@ if st.sidebar.button("Submit"):
61
  # Generate text using the model
62
  outputs = model.generate(
63
  **input_ids,
64
- max_new_tokens=500, # Limit the number of tokens generated
65
- no_repeat_ngram_size=5, # Prevent repetition of 5-gram phrases
 
66
  )
67
 
68
  # Decode and display the results
69
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
-
71
- # Remove unwanted text fields from the response
72
- clean_response = response.replace("### Instruction and Input:", "").replace("### Response:", "").strip()
73
-
74
- st.write(clean_response)
75
- else:
76
- st.sidebar.error("Please upload a PDF file and enter a query.")
77
-
78
- # Footer with LinkedIn link
79
- st.sidebar.write("---")
80
- st.sidebar.write("Created by: [Engr. Hamesh Raj](https://www.linkedin.com/in/datascientisthameshraj/)")
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "himmeow/vi-gemma-2b-RAG",
11
  device_map="auto",
12
+ torch_dtype=torch.float16 # Use FP16 for faster computation if supported
13
  )
14
 
15
  # Use GPU if available
 
34
  pdf_text = ""
35
  with BytesIO(uploaded_file.read()) as file:
36
  reader = PdfReader(file)
37
+ for page in reader.pages:
 
38
  text = page.extract_text()
39
  pdf_text += text + "\n"
40
 
41
  # Define the prompt format for the model
42
+ prompt = f"""
43
+ {pdf_text}
44
 
45
+ Please answer the question: {query}
46
 
 
47
  """
48
 
49
+ # Break the text into chunks if it's too long for the model
50
+ max_input_length = 2048 # Adjust based on the model's max length
51
+ input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
 
 
52
 
53
  # Use GPU for input ids if available
54
  if torch.cuda.is_available():
 
57
  # Generate text using the model
58
  outputs = model.generate(
59
  **input_ids,
60
+ max_new_tokens=250, # Reduce the number of tokens generated for faster results
61
+ no_repeat_ngram_size=3, # Prevent repetition
62
+ num_beams=2, # Use beam search with fewer beams for faster results
63
  )
64
 
65
  # Decode and display the results
66
+ response = tokenizer.decode(outputs[0], skip_special