capradeepgujaran commited on
Commit
601f904
1 Parent(s): ebe8df2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -128
app.py CHANGED
@@ -23,137 +23,31 @@ vector_index = None
23
  query_log = []
24
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Get available languages for OCR
27
  try:
28
  langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
29
  except:
30
  langs = ['eng'] # Fallback to English if tesseract isn't properly configured
31
 
32
- def create_temp_dir():
33
- """Create temporary directory if it doesn't exist"""
34
- temp_dir = os.path.join(os.getcwd(), 'temp')
35
- if not os.path.exists(temp_dir):
36
- os.makedirs(temp_dir)
37
- return temp_dir
38
-
39
- def preprocess_image(image_path):
40
- img = cv2.imread(image_path)
41
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
- gray = cv2.equalizeHist(gray)
43
- gray = cv2.GaussianBlur(gray, (5, 5), 0)
44
- processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
45
- cv2.THRESH_BINARY, 11, 2)
46
- temp_dir = create_temp_dir()
47
- temp_filename = os.path.join(temp_dir, "processed_image.png")
48
- cv2.imwrite(temp_filename, processed_image)
49
- return temp_filename
50
-
51
- def extract_text_from_image(image_path, lang='eng'):
52
- processed_image_path = preprocess_image(image_path)
53
- text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
54
- try:
55
- os.remove(processed_image_path)
56
- except:
57
- pass
58
- return text
59
-
60
- def extract_text_from_pdf(pdf_path, lang='eng'):
61
- text = ""
62
- temp_dir = create_temp_dir()
63
- try:
64
- with open(pdf_path, 'rb') as file:
65
- pdf_reader = PyPDF2.PdfReader(file)
66
- for page_num in range(len(pdf_reader.pages)):
67
- page = pdf_reader.pages[page_num]
68
- page_text = page.extract_text()
69
- if page_text.strip():
70
- text += page_text
71
- else:
72
- images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
73
- for image in images:
74
- temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
75
- image.save(temp_image_path, 'PNG')
76
- text += extract_text_from_image(temp_image_path, lang=lang)
77
- text += f"\n[OCR applied on page {page_num + 1}]\n"
78
- try:
79
- os.remove(temp_image_path)
80
- except:
81
- pass
82
- except Exception as e:
83
- return f"Error processing PDF: {str(e)}"
84
- return text
85
-
86
- def extract_text(file_path, lang='eng'):
87
- file_ext = file_path.lower().split('.')[-1]
88
- if file_ext in ['pdf']:
89
- return extract_text_from_pdf(file_path, lang)
90
- elif file_ext in ['png', 'jpg', 'jpeg']:
91
- return extract_text_from_image(file_path, lang)
92
- else:
93
- return f"Unsupported file type: {file_ext}"
94
-
95
- def process_upload(api_key, files, lang):
96
- global vector_index
97
-
98
- if not api_key:
99
- return "Please provide a valid OpenAI API Key."
100
-
101
- if not files:
102
- return "No files uploaded."
103
-
104
- documents = []
105
- error_messages = []
106
- image_heavy_docs = []
107
-
108
- for file_path in files:
109
- try:
110
- text = extract_text(file_path, lang)
111
- if "This document consists of" in text and "page(s) of images" in text:
112
- image_heavy_docs.append(os.path.basename(file_path))
113
- documents.append(Document(text=text))
114
- except Exception as e:
115
- error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
116
- logging.error(error_message)
117
- error_messages.append(error_message)
118
-
119
- if documents:
120
- try:
121
- embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=api_key)
122
- vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
123
-
124
- success_message = f"Successfully indexed {len(documents)} files."
125
- if image_heavy_docs:
126
- success_message += f"\nNote: The following documents consist mainly of images and may require manual review: {', '.join(image_heavy_docs)}"
127
- if error_messages:
128
- success_message += f"\nErrors: {'; '.join(error_messages)}"
129
-
130
- return success_message
131
- except Exception as e:
132
- return f"Error creating index: {str(e)}"
133
- else:
134
- return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
135
-
136
- def query_app(query, model_name, use_similarity_check, api_key):
137
- global vector_index, query_log
138
-
139
- if vector_index is None:
140
- return "No documents indexed yet. Please upload documents first."
141
-
142
- if not api_key:
143
- return "Please provide a valid OpenAI API Key."
144
-
145
- try:
146
- llm = OpenAI(model=model_name, api_key=api_key)
147
- response_synthesizer = get_response_synthesizer(llm=llm)
148
- query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
149
- response = query_engine.query(query)
150
-
151
- generated_response = response.response
152
- return generated_response
153
-
154
- except Exception as e:
155
- logging.error(f"Error during query processing: {e}")
156
- return f"Error during query processing: {str(e)}"
157
 
158
  def create_gradio_interface():
159
  with gr.Blocks(title="Document Processing and TTS App") as demo:
@@ -173,9 +67,9 @@ def create_gradio_interface():
173
  with gr.Tab("❓ Ask a Question"):
174
  query_input = gr.Textbox(label="Enter your question")
175
  model_dropdown = gr.Dropdown(
176
- choices=["gpt-4o-mini", "gpt-4o"],
177
  label="Select Model",
178
- value="gpt-4o-mini"
179
  )
180
  similarity_checkbox = gr.Checkbox(label="Use Similarity Check", value=False)
181
  query_button = gr.Button("Ask")
@@ -194,6 +88,12 @@ def create_gradio_interface():
194
  value=1.0,
195
  label="Voice Speed"
196
  )
 
 
 
 
 
 
197
  output_option = gr.Radio(
198
  choices=["audio", "script_text", "both"],
199
  label="Output Option",
@@ -227,7 +127,7 @@ def create_gradio_interface():
227
  fn=generate_audio_and_text,
228
  inputs=[
229
  api_key_input, text_input, model_dropdown, voice_type,
230
- voice_speed, output_option
231
  ],
232
  outputs=[audio_output, script_output, status_output]
233
  )
 
23
  query_log = []
24
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
 
26
+ # Define available languages for TTS
27
+ AVAILABLE_LANGUAGES = [
28
+ ("en", "English"),
29
+ ("ar", "Arabic"),
30
+ ("de", "German"),
31
+ ("mr", "Marathi"),
32
+ ("kn", "Kannada"),
33
+ ("tl", "Filipino (Tagalog)"),
34
+ ("fr", "French"),
35
+ ("gu", "Gujarati"),
36
+ ("hi", "Hindi"),
37
+ ("ml", "Malayalam"),
38
+ ("ta", "Tamil"),
39
+ ("te", "Telugu"),
40
+ ("ur", "Urdu"),
41
+ ("si", "Sinhala")
42
+ ]
43
+
44
  # Get available languages for OCR
45
  try:
46
  langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
47
  except:
48
  langs = ['eng'] # Fallback to English if tesseract isn't properly configured
49
 
50
+ # ... (keep all the existing functions until create_gradio_interface unchanged) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def create_gradio_interface():
53
  with gr.Blocks(title="Document Processing and TTS App") as demo:
 
67
  with gr.Tab("❓ Ask a Question"):
68
  query_input = gr.Textbox(label="Enter your question")
69
  model_dropdown = gr.Dropdown(
70
+ choices=["gpt-4-0125-preview", "gpt-3.5-turbo-0125"],
71
  label="Select Model",
72
+ value="gpt-3.5-turbo-0125"
73
  )
74
  similarity_checkbox = gr.Checkbox(label="Use Similarity Check", value=False)
75
  query_button = gr.Button("Ask")
 
88
  value=1.0,
89
  label="Voice Speed"
90
  )
91
+ language = gr.Dropdown(
92
+ choices=[(code, name) for code, name in AVAILABLE_LANGUAGES],
93
+ label="Language for Audio and Script",
94
+ value="en",
95
+ type="value"
96
+ )
97
  output_option = gr.Radio(
98
  choices=["audio", "script_text", "both"],
99
  label="Output Option",
 
127
  fn=generate_audio_and_text,
128
  inputs=[
129
  api_key_input, text_input, model_dropdown, voice_type,
130
+ voice_speed, language, output_option
131
  ],
132
  outputs=[audio_output, script_output, status_output]
133
  )