capradeepgujaran commited on
Commit
9d47d09
β€’
1 Parent(s): c08083d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -96
app.py CHANGED
@@ -10,23 +10,31 @@ from llama_index.core import VectorStoreIndex, Document
10
  from llama_index.embeddings.openai import OpenAIEmbedding
11
  from llama_index.llms.openai import OpenAI
12
  from llama_index.core import get_response_synthesizer
13
- from dotenv import load_dotenv
14
  from sentence_transformers import SentenceTransformer, util
15
  import logging
16
- from openai_tts_tool import generate_audio_and_text # Importing the TTS tool
 
17
 
18
  # Set up logging configuration
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
20
 
21
- # Load environment variables from .env file
22
- load_dotenv()
23
-
24
  # Initialize global variables
25
  vector_index = None
26
  query_log = []
27
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
28
 
29
- langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def preprocess_image(image_path):
32
  img = cv2.imread(image_path)
@@ -34,18 +42,24 @@ def preprocess_image(image_path):
34
  gray = cv2.equalizeHist(gray)
35
  gray = cv2.GaussianBlur(gray, (5, 5), 0)
36
  processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
37
- cv2.THRESH_BINARY, 11, 2)
38
- temp_filename = "processed_image.png"
 
39
  cv2.imwrite(temp_filename, processed_image)
40
  return temp_filename
41
 
42
  def extract_text_from_image(image_path, lang='eng'):
43
  processed_image_path = preprocess_image(image_path)
44
  text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
 
 
 
 
45
  return text
46
 
47
  def extract_text_from_pdf(pdf_path, lang='eng'):
48
  text = ""
 
49
  try:
50
  with open(pdf_path, 'rb') as file:
51
  pdf_reader = PyPDF2.PdfReader(file)
@@ -57,9 +71,14 @@ def extract_text_from_pdf(pdf_path, lang='eng'):
57
  else:
58
  images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
59
  for image in images:
60
- image.save('temp_image.png', 'PNG')
61
- text += extract_text_from_image('temp_image.png', lang=lang)
 
62
  text += f"\n[OCR applied on page {page_num + 1}]\n"
 
 
 
 
63
  except Exception as e:
64
  return f"Error processing PDF: {str(e)}"
65
  return text
@@ -77,10 +96,10 @@ def process_upload(api_key, files, lang):
77
  global vector_index
78
 
79
  if not api_key:
80
- return "Please provide a valid OpenAI API Key.", None
81
 
82
  if not files:
83
- return "No files uploaded.", None
84
 
85
  documents = []
86
  error_messages = []
@@ -93,7 +112,7 @@ def process_upload(api_key, files, lang):
93
  image_heavy_docs.append(os.path.basename(file_path))
94
  documents.append(Document(text=text))
95
  except Exception as e:
96
- error_message = f"Error processing file {file_path}: {str(e)}"
97
  logging.error(error_message)
98
  error_messages.append(error_message)
99
 
@@ -108,118 +127,122 @@ def process_upload(api_key, files, lang):
108
  if error_messages:
109
  success_message += f"\nErrors: {'; '.join(error_messages)}"
110
 
111
- return success_message, vector_index
112
  except Exception as e:
113
- return f"Error creating index: {str(e)}", None
114
  else:
115
- return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}", None
116
-
117
- def calculate_similarity(response, ground_truth):
118
- response_embedding = sentence_model.encode(response, convert_to_tensor=True)
119
- truth_embedding = sentence_model.encode(ground_truth, convert_to_tensor=True)
120
-
121
- response_embedding = response_embedding / np.linalg.norm(response_embedding)
122
- truth_embedding = truth_embedding / np.linalg.norm(truth_embedding)
123
-
124
- similarity = np.dot(response_embedding, truth_embedding)
125
- similarity_percentage = (similarity + 1) / 2 * 100
126
-
127
- return similarity_percentage
128
 
129
- def query_app(query, model_name, use_similarity_check, openai_api_key, answer_state):
130
  global vector_index, query_log
131
 
132
  if vector_index is None:
133
- logging.error("No documents indexed yet. Please upload documents first.")
134
- return "No documents indexed yet. Please upload documents first.", None
135
 
136
- if not openai_api_key:
137
- logging.error("No OpenAI API Key provided.")
138
- return "Please provide a valid OpenAI API Key.", None
139
-
140
- try:
141
- llm = OpenAI(model=model_name, api_key=openai_api_key)
142
- except Exception as e:
143
- logging.error(f"Error initializing the OpenAI model: {e}")
144
- return f"Error initializing the OpenAI model: {e}", None
145
-
146
- response_synthesizer = get_response_synthesizer(llm=llm)
147
- query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
148
 
149
  try:
 
 
 
150
  response = query_engine.query(query)
 
 
 
 
151
  except Exception as e:
152
  logging.error(f"Error during query processing: {e}")
153
- return f"Error during query processing: {e}", None
154
 
155
- generated_response = response.response
156
- query_log.append({
157
- "query_id": str(len(query_log) + 1),
158
- "query": query,
159
- "gt_answer": "Placeholder ground truth answer",
160
- "response": generated_response,
161
- "retrieved_context": [{"text": doc.text} for doc in response.source_nodes]
162
- })
163
-
164
- metrics = {}
165
-
166
- if use_similarity_check:
167
- try:
168
- logging.info("Similarity check is enabled. Calculating similarity.")
169
- similarity = calculate_similarity(generated_response, "Placeholder ground truth answer")
170
- metrics['similarity'] = similarity
171
- logging.info(f"Similarity calculated: {similarity}")
172
- except Exception as e:
173
- logging.error(f"Error during similarity calculation: {e}")
174
- metrics['error'] = f"Error during similarity calculation: {e}"
175
-
176
- if answer_state is None:
177
- answer_state = gr.State()
178
-
179
- answer_state.value = generated_response
180
-
181
- return generated_response, metrics if use_similarity_check else None
182
-
183
- def process_tts(api_key, text, model_name, voice_type, voice_speed, language, output_option, summary_length, additional_prompt):
184
- return generate_audio_and_text(api_key, text, model_name, voice_type, voice_speed, language, output_option, summary_length, additional_prompt)
185
-
186
- def main():
187
  with gr.Blocks(title="Document Processing and TTS App") as demo:
188
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
189
-
190
- # Upload documents and chat functionality
191
  with gr.Tab("πŸ“€ Upload Documents"):
192
- api_key_input = gr.Textbox(label="Enter OpenAI API Key", placeholder="Paste your OpenAI API Key here")
 
 
 
 
193
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
194
  lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
195
  upload_button = gr.Button("Upload and Index")
196
  upload_status = gr.Textbox(label="Status", interactive=False)
197
- upload_button.click(fn=process_upload, inputs=[api_key_input, file_upload, lang_dropdown], outputs=[upload_status])
198
 
199
- answer_state = gr.State() # Initialize answer state here
200
  with gr.Tab("❓ Ask a Question"):
201
  query_input = gr.Textbox(label="Enter your question")
202
- model_dropdown = gr.Dropdown(choices=["gpt-4o", "gpt-4o-mini"], label="Select Model", value="gpt-4o")
 
 
 
 
203
  similarity_checkbox = gr.Checkbox(label="Use Similarity Check", value=False)
204
  query_button = gr.Button("Ask")
205
  answer_output = gr.Textbox(label="Answer", interactive=False)
206
- metrics_output = gr.JSON(label="Metrics")
207
- query_button.click(fn=query_app, inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input, answer_state], outputs=[answer_output, metrics_output])
208
 
209
  with gr.Tab("πŸ—£οΈ Generate Audio and Text"):
210
- text_input = gr.Textbox(label="Enter text for generation", value=answer_state)
211
- voice_type = gr.Dropdown(choices=["alloy", "echo", "fable", "onyx"], label="Voice Type", value="alloy")
212
- voice_speed = gr.Dropdown(choices=["normal", "slow", "fast"], label="Voice Speed", value="normal")
213
- language = gr.Dropdown(choices=["en", "ar", "de", "hi"], label="Language", value="en")
214
- output_option = gr.Radio(choices=["audio", "summary_text", "both"], label="Output Option", value="both")
215
- summary_length = gr.Number(label="Summary Length", value=100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
217
  generate_button = gr.Button("Generate")
218
- audio_output = gr.Audio(label="Generated Audio", interactive=False)
219
- summary_output = gr.Textbox(label="Generated Summary Text", interactive=False)
220
- generate_button.click(fn=process_tts, inputs=[api_key_input, text_input, model_dropdown, voice_type, voice_speed, language, output_option, summary_length, additional_prompt], outputs=[audio_output, summary_output])
221
-
222
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  if __name__ == "__main__":
225
- main()
 
 
 
 
10
  from llama_index.embeddings.openai import OpenAIEmbedding
11
  from llama_index.llms.openai import OpenAI
12
  from llama_index.core import get_response_synthesizer
 
13
  from sentence_transformers import SentenceTransformer, util
14
  import logging
15
+ from openai_tts_tool import generate_audio_and_text
16
+ import tempfile
17
 
18
  # Set up logging configuration
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
20
 
 
 
 
21
  # Initialize global variables
22
  vector_index = None
23
  query_log = []
24
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
 
26
+ # Get available languages for OCR
27
+ try:
28
+ langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
29
+ except:
30
+ langs = ['eng'] # Fallback to English if tesseract isn't properly configured
31
+
32
+ def create_temp_dir():
33
+ """Create temporary directory if it doesn't exist"""
34
+ temp_dir = os.path.join(os.getcwd(), 'temp')
35
+ if not os.path.exists(temp_dir):
36
+ os.makedirs(temp_dir)
37
+ return temp_dir
38
 
39
  def preprocess_image(image_path):
40
  img = cv2.imread(image_path)
 
42
  gray = cv2.equalizeHist(gray)
43
  gray = cv2.GaussianBlur(gray, (5, 5), 0)
44
  processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
45
+ cv2.THRESH_BINARY, 11, 2)
46
+ temp_dir = create_temp_dir()
47
+ temp_filename = os.path.join(temp_dir, "processed_image.png")
48
  cv2.imwrite(temp_filename, processed_image)
49
  return temp_filename
50
 
51
  def extract_text_from_image(image_path, lang='eng'):
52
  processed_image_path = preprocess_image(image_path)
53
  text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
54
+ try:
55
+ os.remove(processed_image_path)
56
+ except:
57
+ pass
58
  return text
59
 
60
  def extract_text_from_pdf(pdf_path, lang='eng'):
61
  text = ""
62
+ temp_dir = create_temp_dir()
63
  try:
64
  with open(pdf_path, 'rb') as file:
65
  pdf_reader = PyPDF2.PdfReader(file)
 
71
  else:
72
  images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
73
  for image in images:
74
+ temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
75
+ image.save(temp_image_path, 'PNG')
76
+ text += extract_text_from_image(temp_image_path, lang=lang)
77
  text += f"\n[OCR applied on page {page_num + 1}]\n"
78
+ try:
79
+ os.remove(temp_image_path)
80
+ except:
81
+ pass
82
  except Exception as e:
83
  return f"Error processing PDF: {str(e)}"
84
  return text
 
96
  global vector_index
97
 
98
  if not api_key:
99
+ return "Please provide a valid OpenAI API Key."
100
 
101
  if not files:
102
+ return "No files uploaded."
103
 
104
  documents = []
105
  error_messages = []
 
112
  image_heavy_docs.append(os.path.basename(file_path))
113
  documents.append(Document(text=text))
114
  except Exception as e:
115
+ error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
116
  logging.error(error_message)
117
  error_messages.append(error_message)
118
 
 
127
  if error_messages:
128
  success_message += f"\nErrors: {'; '.join(error_messages)}"
129
 
130
+ return success_message
131
  except Exception as e:
132
+ return f"Error creating index: {str(e)}"
133
  else:
134
+ return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ def query_app(query, model_name, use_similarity_check, api_key):
137
  global vector_index, query_log
138
 
139
  if vector_index is None:
140
+ return "No documents indexed yet. Please upload documents first."
 
141
 
142
+ if not api_key:
143
+ return "Please provide a valid OpenAI API Key."
 
 
 
 
 
 
 
 
 
 
144
 
145
  try:
146
+ llm = OpenAI(model=model_name, api_key=api_key)
147
+ response_synthesizer = get_response_synthesizer(llm=llm)
148
+ query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
149
  response = query_engine.query(query)
150
+
151
+ generated_response = response.response
152
+ return generated_response
153
+
154
  except Exception as e:
155
  logging.error(f"Error during query processing: {e}")
156
+ return f"Error during query processing: {str(e)}"
157
 
158
+ def create_gradio_interface():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  with gr.Blocks(title="Document Processing and TTS App") as demo:
160
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
161
+
 
162
  with gr.Tab("πŸ“€ Upload Documents"):
163
+ api_key_input = gr.Textbox(
164
+ label="Enter OpenAI API Key",
165
+ placeholder="Paste your OpenAI API Key here",
166
+ type="password"
167
+ )
168
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
169
  lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
170
  upload_button = gr.Button("Upload and Index")
171
  upload_status = gr.Textbox(label="Status", interactive=False)
 
172
 
 
173
  with gr.Tab("❓ Ask a Question"):
174
  query_input = gr.Textbox(label="Enter your question")
175
+ model_dropdown = gr.Dropdown(
176
+ choices=["gpt-4-0125-preview", "gpt-3.5-turbo-0125"],
177
+ label="Select Model",
178
+ value="gpt-3.5-turbo-0125"
179
+ )
180
  similarity_checkbox = gr.Checkbox(label="Use Similarity Check", value=False)
181
  query_button = gr.Button("Ask")
182
  answer_output = gr.Textbox(label="Answer", interactive=False)
 
 
183
 
184
  with gr.Tab("πŸ—£οΈ Generate Audio and Text"):
185
+ text_input = gr.Textbox(label="Enter text for generation")
186
+ voice_type = gr.Dropdown(
187
+ choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
188
+ label="Voice Type",
189
+ value="alloy"
190
+ )
191
+ voice_speed = gr.Slider(
192
+ minimum=0.25,
193
+ maximum=4.0,
194
+ value=1.0,
195
+ label="Voice Speed"
196
+ )
197
+ language = gr.Dropdown(
198
+ choices=["en", "ar", "de", "hi", "es", "fr", "it", "ja", "ko", "pt"],
199
+ label="Language",
200
+ value="en"
201
+ )
202
+ output_option = gr.Radio(
203
+ choices=["audio", "summary_text", "both"],
204
+ label="Output Option",
205
+ value="both"
206
+ )
207
+ summary_length = gr.Slider(
208
+ minimum=50,
209
+ maximum=500,
210
+ value=100,
211
+ step=10,
212
+ label="Summary Length (words)"
213
+ )
214
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
215
  generate_button = gr.Button("Generate")
216
+ audio_output = gr.Audio(label="Generated Audio")
217
+ summary_output = gr.Textbox(label="Generated Summary Text")
218
+
219
+ # Wire up the components
220
+ upload_button.click(
221
+ fn=process_upload,
222
+ inputs=[api_key_input, file_upload, lang_dropdown],
223
+ outputs=[upload_status]
224
+ )
225
+
226
+ query_button.click(
227
+ fn=query_app,
228
+ inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
229
+ outputs=[answer_output]
230
+ )
231
+
232
+ generate_button.click(
233
+ fn=generate_audio_and_text,
234
+ inputs=[
235
+ api_key_input, text_input, model_dropdown, voice_type,
236
+ voice_speed, language, output_option, summary_length,
237
+ additional_prompt
238
+ ],
239
+ outputs=[audio_output, summary_output]
240
+ )
241
+
242
+ return demo
243
 
244
  if __name__ == "__main__":
245
+ demo = create_gradio_interface()
246
+ demo.launch()
247
+ else:
248
+ demo = create_gradio_interface()