Spaces:
Sleeping
Sleeping
capradeepgujaran
commited on
Commit
β’
503a035
1
Parent(s):
5fad48b
Update app.py
Browse files
app.py
CHANGED
@@ -25,31 +25,157 @@ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
25 |
|
26 |
# Define available languages for TTS
|
27 |
AVAILABLE_LANGUAGES = [
|
28 |
-
|
29 |
-
("
|
30 |
-
|
31 |
-
("mr", "Marathi"),
|
32 |
-
("kn", "Kannada"),
|
33 |
-
("tl", "Filipino (Tagalog)"),
|
34 |
-
("fr", "French"),
|
35 |
-
("gu", "Gujarati"),
|
36 |
-
("hi", "Hindi"),
|
37 |
-
("ml", "Malayalam"),
|
38 |
-
("ta", "Tamil"),
|
39 |
-
("te", "Telugu"),
|
40 |
-
("ur", "Urdu"),
|
41 |
-
("si", "Sinhala")
|
42 |
]
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# Get available languages for OCR
|
45 |
try:
|
46 |
langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
47 |
except:
|
48 |
langs = ['eng'] # Fallback to English if tesseract isn't properly configured
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def create_gradio_interface():
|
|
|
53 |
with gr.Blocks(title="Document Processing and TTS App") as demo:
|
54 |
gr.Markdown("# π Document Processing, Text & Audio Generation App")
|
55 |
|
@@ -89,10 +215,9 @@ def create_gradio_interface():
|
|
89 |
label="Voice Speed"
|
90 |
)
|
91 |
language = gr.Dropdown(
|
92 |
-
choices=
|
93 |
label="Language for Audio and Script",
|
94 |
-
value="
|
95 |
-
type="value"
|
96 |
)
|
97 |
output_option = gr.Radio(
|
98 |
choices=["audio", "script_text", "both"],
|
@@ -122,9 +247,15 @@ def create_gradio_interface():
|
|
122 |
inputs=[answer_output],
|
123 |
outputs=[text_input]
|
124 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
generate_button.click(
|
127 |
-
fn=
|
128 |
inputs=[
|
129 |
api_key_input, text_input, model_dropdown, voice_type,
|
130 |
voice_speed, language, output_option
|
@@ -136,6 +267,4 @@ def create_gradio_interface():
|
|
136 |
|
137 |
if __name__ == "__main__":
|
138 |
demo = create_gradio_interface()
|
139 |
-
demo.launch()
|
140 |
-
else:
|
141 |
-
demo = create_gradio_interface()
|
|
|
25 |
|
26 |
# Define available languages for TTS
|
27 |
AVAILABLE_LANGUAGES = [
|
28 |
+
"English", "Arabic", "German", "Marathi", "Kannada",
|
29 |
+
"Filipino (Tagalog)", "French", "Gujarati", "Hindi",
|
30 |
+
"Malayalam", "Tamil", "Telugu", "Urdu", "Sinhala"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
]
|
32 |
|
33 |
+
LANGUAGE_CODES = {
|
34 |
+
"English": "en", "Arabic": "ar", "German": "de",
|
35 |
+
"Marathi": "mr", "Kannada": "kn", "Filipino (Tagalog)": "tl",
|
36 |
+
"French": "fr", "Gujarati": "gu", "Hindi": "hi",
|
37 |
+
"Malayalam": "ml", "Tamil": "ta", "Telugu": "te",
|
38 |
+
"Urdu": "ur", "Sinhala": "si"
|
39 |
+
}
|
40 |
+
|
41 |
# Get available languages for OCR
|
42 |
try:
|
43 |
langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
44 |
except:
|
45 |
langs = ['eng'] # Fallback to English if tesseract isn't properly configured
|
46 |
|
47 |
+
def create_temp_dir():
|
48 |
+
"""Create temporary directory if it doesn't exist"""
|
49 |
+
temp_dir = os.path.join(os.getcwd(), 'temp')
|
50 |
+
if not os.path.exists(temp_dir):
|
51 |
+
os.makedirs(temp_dir)
|
52 |
+
return temp_dir
|
53 |
+
|
54 |
+
def preprocess_image(image_path):
|
55 |
+
"""Preprocess the image for better OCR results"""
|
56 |
+
img = cv2.imread(image_path)
|
57 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
58 |
+
gray = cv2.equalizeHist(gray)
|
59 |
+
gray = cv2.GaussianBlur(gray, (5, 5), 0)
|
60 |
+
processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
61 |
+
cv2.THRESH_BINARY, 11, 2)
|
62 |
+
temp_dir = create_temp_dir()
|
63 |
+
temp_filename = os.path.join(temp_dir, "processed_image.png")
|
64 |
+
cv2.imwrite(temp_filename, processed_image)
|
65 |
+
return temp_filename
|
66 |
+
|
67 |
+
def extract_text_from_image(image_path, lang='eng'):
|
68 |
+
"""Extract text from image using OCR"""
|
69 |
+
processed_image_path = preprocess_image(image_path)
|
70 |
+
text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
|
71 |
+
try:
|
72 |
+
os.remove(processed_image_path)
|
73 |
+
except:
|
74 |
+
pass
|
75 |
+
return text
|
76 |
+
|
77 |
+
def extract_text_from_pdf(pdf_path, lang='eng'):
|
78 |
+
"""Extract text from PDF file"""
|
79 |
+
text = ""
|
80 |
+
temp_dir = create_temp_dir()
|
81 |
+
try:
|
82 |
+
with open(pdf_path, 'rb') as file:
|
83 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
84 |
+
for page_num in range(len(pdf_reader.pages)):
|
85 |
+
page = pdf_reader.pages[page_num]
|
86 |
+
page_text = page.extract_text()
|
87 |
+
if page_text.strip():
|
88 |
+
text += page_text
|
89 |
+
else:
|
90 |
+
images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
|
91 |
+
for image in images:
|
92 |
+
temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
|
93 |
+
image.save(temp_image_path, 'PNG')
|
94 |
+
text += extract_text_from_image(temp_image_path, lang=lang)
|
95 |
+
text += f"\n[OCR applied on page {page_num + 1}]\n"
|
96 |
+
try:
|
97 |
+
os.remove(temp_image_path)
|
98 |
+
except:
|
99 |
+
pass
|
100 |
+
except Exception as e:
|
101 |
+
return f"Error processing PDF: {str(e)}"
|
102 |
+
return text
|
103 |
+
|
104 |
+
def extract_text(file_path, lang='eng'):
|
105 |
+
"""Extract text from uploaded file"""
|
106 |
+
file_ext = file_path.lower().split('.')[-1]
|
107 |
+
if file_ext in ['pdf']:
|
108 |
+
return extract_text_from_pdf(file_path, lang)
|
109 |
+
elif file_ext in ['png', 'jpg', 'jpeg']:
|
110 |
+
return extract_text_from_image(file_path, lang)
|
111 |
+
else:
|
112 |
+
return f"Unsupported file type: {file_ext}"
|
113 |
+
|
114 |
+
def process_upload(api_key, files, lang):
|
115 |
+
"""Process uploaded files and create vector index"""
|
116 |
+
global vector_index
|
117 |
+
|
118 |
+
if not api_key:
|
119 |
+
return "Please provide a valid OpenAI API Key."
|
120 |
+
|
121 |
+
if not files:
|
122 |
+
return "No files uploaded."
|
123 |
+
|
124 |
+
documents = []
|
125 |
+
error_messages = []
|
126 |
+
image_heavy_docs = []
|
127 |
+
|
128 |
+
for file_path in files:
|
129 |
+
try:
|
130 |
+
text = extract_text(file_path, lang)
|
131 |
+
if text.strip(): # Only add non-empty documents
|
132 |
+
documents.append(Document(text=text))
|
133 |
+
else:
|
134 |
+
error_messages.append(f"No text extracted from {os.path.basename(file_path)}")
|
135 |
+
except Exception as e:
|
136 |
+
error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
|
137 |
+
logging.error(error_message)
|
138 |
+
error_messages.append(error_message)
|
139 |
+
|
140 |
+
if documents:
|
141 |
+
try:
|
142 |
+
embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=api_key)
|
143 |
+
vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
|
144 |
+
|
145 |
+
success_message = f"Successfully indexed {len(documents)} files."
|
146 |
+
if error_messages:
|
147 |
+
success_message += f"\nErrors: {'; '.join(error_messages)}"
|
148 |
+
|
149 |
+
return success_message
|
150 |
+
except Exception as e:
|
151 |
+
return f"Error creating index: {str(e)}"
|
152 |
+
else:
|
153 |
+
return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
|
154 |
+
|
155 |
+
def query_app(query, model_name, use_similarity_check, api_key):
|
156 |
+
"""Process query and return response"""
|
157 |
+
global vector_index, query_log
|
158 |
+
|
159 |
+
if vector_index is None:
|
160 |
+
return "No documents indexed yet. Please upload documents first."
|
161 |
+
|
162 |
+
if not api_key:
|
163 |
+
return "Please provide a valid OpenAI API Key."
|
164 |
+
|
165 |
+
try:
|
166 |
+
llm = OpenAI(model=model_name, api_key=api_key)
|
167 |
+
response_synthesizer = get_response_synthesizer(llm=llm)
|
168 |
+
query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
|
169 |
+
response = query_engine.query(query)
|
170 |
+
|
171 |
+
return response.response
|
172 |
+
|
173 |
+
except Exception as e:
|
174 |
+
logging.error(f"Error during query processing: {e}")
|
175 |
+
return f"Error during query processing: {str(e)}"
|
176 |
|
177 |
def create_gradio_interface():
|
178 |
+
"""Create and configure the Gradio interface"""
|
179 |
with gr.Blocks(title="Document Processing and TTS App") as demo:
|
180 |
gr.Markdown("# π Document Processing, Text & Audio Generation App")
|
181 |
|
|
|
215 |
label="Voice Speed"
|
216 |
)
|
217 |
language = gr.Dropdown(
|
218 |
+
choices=AVAILABLE_LANGUAGES,
|
219 |
label="Language for Audio and Script",
|
220 |
+
value="English"
|
|
|
221 |
)
|
222 |
output_option = gr.Radio(
|
223 |
choices=["audio", "script_text", "both"],
|
|
|
247 |
inputs=[answer_output],
|
248 |
outputs=[text_input]
|
249 |
)
|
250 |
+
|
251 |
+
def process_generation(*args):
|
252 |
+
args = list(args)
|
253 |
+
# Convert language name to code
|
254 |
+
args[5] = LANGUAGE_CODES[args[5]]
|
255 |
+
return generate_audio_and_text(*args)
|
256 |
|
257 |
generate_button.click(
|
258 |
+
fn=process_generation,
|
259 |
inputs=[
|
260 |
api_key_input, text_input, model_dropdown, voice_type,
|
261 |
voice_speed, language, output_option
|
|
|
267 |
|
268 |
if __name__ == "__main__":
|
269 |
demo = create_gradio_interface()
|
270 |
+
demo.launch()
|
|
|
|