avengers27 commited on
Commit
dca1254
1 Parent(s): c01bd3c

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +334 -0
  3. output.mp3 +3 -0
  4. response.mp3 +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output.mp3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from langchain.chains import ConversationalRetrievalChain
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.memory import ConversationBufferMemory
6
+ from langchain_core.prompts import PromptTemplate
7
+ from langchain_community.vectorstores import FAISS
8
+ import pdfplumber
9
+ import docx2txt
10
+ from langchain_community.embeddings import OllamaEmbeddings
11
+ from langchain_groq import ChatGroq
12
+ from dotenv import load_dotenv
13
+ from easygoogletranslate import EasyGoogleTranslate
14
+ import os
15
+ import csv
16
+ import re
17
+ from io import StringIO
18
+ import speech_recognition as sr
19
+ import pygame
20
+ from threading import Thread
21
+ from gtts import gTTS
22
+ import gc
23
+ import torch
24
+
25
+ os.environ['CUDA_VISIBLE_DEVICES'] = ''
26
+ torch.set_num_threads(1)
27
+
28
+ load_dotenv()
29
+ groq_api_key = os.getenv('GROQ_API_KEY')
30
+
31
+ MAX_DOCUMENTS = 5
32
+
33
+ def initialize_session_state():
34
+ if 'history' not in st.session_state:
35
+ st.session_state['history'] = []
36
+ if 'generated' not in st.session_state:
37
+ st.session_state['generated'] = ["Hello! Ask me anything about 🤗"]
38
+ if 'past' not in st.session_state:
39
+ st.session_state['past'] = ["Hey! 👋"]
40
+ if 'translated' not in st.session_state:
41
+ st.session_state['translated'] = ["Hello! Ask me anything about 🤗"]
42
+ if 'translation_requested' not in st.session_state:
43
+ st.session_state['translation_requested'] = [False] * len(st.session_state['generated'])
44
+ if 'chain' not in st.session_state:
45
+ st.session_state['chain'] = None
46
+ if 'vector_store' not in st.session_state:
47
+ st.session_state['vector_store'] = None
48
+
49
+ def translate_text(text, target_language='en'):
50
+ translator = EasyGoogleTranslate(target_language=target_language)
51
+ try:
52
+ return translator.translate(text)
53
+ except Exception as e:
54
+ st.error(f"Translation error: {e}")
55
+ return text
56
+
57
+ def clean_text_for_speech(text):
58
+ # Replacing symbols and formatting text
59
+ text = re.sub(r'[*_~#|•●■◆▪]', '', text)
60
+ text = re.sub(r'\n', ' ', text)
61
+ text = re.sub(r'\s+', ' ', text)
62
+ text = re.sub(r'([.!?])\s*', r'\1 ', text)
63
+ text = re.sub(r'[:;]', ' ', text)
64
+ text = re.sub(r'[-]', ' ', text)
65
+ text = re.sub(r'[(){}\[\]]', '', text)
66
+
67
+ # Handle numbers and decimals
68
+ text = re.sub(r'(\d+)\.(\d+)', r'\1 point \2', text)
69
+
70
+ # Make sure to handle numbers correctly
71
+ replacements = {
72
+ '&': 'and', '%': 'percent', '$': 'dollars', '€': 'euros', '£': 'pounds',
73
+ '@': 'at', '#': 'hashtag', 'e.g.': 'for example', 'i.e.': 'that is',
74
+ 'etc.': 'et cetera', 'vs.': 'versus', 'fig.': 'figure', 'approx.': 'approximately',
75
+ }
76
+ for key, value in replacements.items():
77
+ text = text.replace(key, value)
78
+
79
+ return text.strip()
80
+
81
+
82
+ def text_to_speech(text, language='en', speed=1.0):
83
+ cleaned_text = clean_text_for_speech(text)
84
+ tts = gTTS(text=cleaned_text, lang=language, slow=(speed < 1.0))
85
+ tts.save("output.mp3")
86
+ with open("output.mp3", "rb") as audio_file:
87
+ audio_bytes = audio_file.read()
88
+ return audio_bytes
89
+
90
+
91
+
92
+ def conversation_chat(query, chain, history):
93
+ template = """
94
+ You are an expert analyst with deep knowledge across various fields. Your task is to provide an in-depth, comprehensive analysis of the uploaded documents. Approach each question with critical thinking and attention to detail.
95
+ You are only allowed to answer questions directly related to the content of the uploaded documents.
96
+ If a question is outside the scope of the documents, respond with: 'I'm sorry, I can only answer questions about the uploaded documents.'
97
+
98
+ Guidelines for Analysis:
99
+
100
+ 1. Document Overview:
101
+ - Identify the type of document(s) (research paper, report, data set, etc.)
102
+ - Summarize the main topic and purpose of each document
103
+
104
+ 2. Content Analysis:
105
+ - For research papers: Analyze the abstract, introduction, methodology, results, discussion, and conclusion
106
+ - For reports: Examine executive summary, key findings, and recommendations
107
+ - For data sets: Describe the structure, variables, and any apparent trends
108
+
109
+ 3. Key Points and Findings:
110
+ - Highlight the most significant information and insights from each document
111
+ - Identify any unique or surprising elements in the content
112
+
113
+ 4. Contextual Analysis:
114
+ - Place the information in a broader context within its field
115
+ - Discuss how this information relates to current trends or issues
116
+
117
+ 5. Critical Evaluation:
118
+ - Assess the strengths and limitations of the presented information
119
+ - Identify any potential biases or gaps in the data or arguments
120
+
121
+ 6. Implications and Applications:
122
+ - Discuss the potential impact of the findings or information
123
+ - Suggest possible applications or areas for further research
124
+
125
+ 7. Comparative Analysis (if multiple documents):
126
+ - Compare and contrast information across different documents
127
+ - Identify any conflicting data or viewpoints
128
+
129
+ 8. Data Interpretation:
130
+ - For numerical data: Provide clear explanations of statistics or trends
131
+ - For qualitative information: Offer interpretations of key quotes or concepts
132
+
133
+ 9. Sourcing and Credibility:
134
+ - Comment on the credibility of the sources (if apparent)
135
+ - Note any references to other important works in the field
136
+
137
+ 10. Comprehensive Response:
138
+ - Ensure all aspects of the question are addressed
139
+ - Provide a balanced view, considering multiple perspectives if applicable
140
+
141
+ Remember to maintain an objective, analytical tone. Your goal is to provide the most thorough and insightful analysis possible based on the available documents.
142
+
143
+ Previous Context: {previous_context}
144
+
145
+ Question: {query}
146
+ """
147
+ prompt = PromptTemplate.from_template(template)
148
+
149
+ result = chain.invoke({"question": query, "chat_history": history}, prompt=prompt)
150
+
151
+ answer = result.get("answer", "I'm sorry, I couldn't generate an answer.")
152
+ history.append((query, answer))
153
+ return answer
154
+
155
+ def display_chat_history(chain):
156
+ st.write("Chat History:")
157
+
158
+ for i in range(len(st.session_state['past'])):
159
+ message(st.session_state['past'][i], is_user=True, key=f'{i}_user', avatar_style="avataaars", seed="Aneka")
160
+ message(st.session_state['generated'][i], key=f'{i}_bot', avatar_style="bottts", seed="Aneka")
161
+
162
+ col1, col2, col3 = st.columns([2, 1, 1])
163
+ with col1:
164
+ dest_language = st.selectbox('Select language for translation:',
165
+ options=['hi', 'kn'],
166
+ index=0,
167
+ key=f'{i}_lang_select')
168
+ with col2:
169
+ if st.button(f'Translate Message {i}', key=f'{i}_translate'):
170
+ translated_text = translate_text(st.session_state['generated'][i], target_language=dest_language)
171
+ st.session_state['translated'][i] = translated_text
172
+ st.session_state['translation_requested'][i] = True
173
+ st.experimental_rerun()
174
+ with col3:
175
+ if st.button(f'Play Message {i}', key=f'{i}_play'):
176
+ audio_bytes = text_to_speech(st.session_state['generated'][i])
177
+ st.audio(audio_bytes, format="audio/mp3")
178
+
179
+ if st.session_state['translation_requested'][i]:
180
+ message(st.session_state['translated'][i], key=f'{i}_bot_translated', avatar_style="bottts", seed="Aneka")
181
+ if st.button(f'Play Translated Message {i}', key=f'{i}_play_translated'):
182
+ audio_bytes = text_to_speech(st.session_state['translated'][i], dest_language)
183
+ st.audio(audio_bytes, format="audio/mp3")
184
+
185
+ with st.form(key='user_input_form'):
186
+ user_input = st.text_input("Ask questions about your uploaded documents:", key="user_input")
187
+ submit_button = st.form_submit_button(label='Send')
188
+
189
+ if submit_button and user_input:
190
+ output = conversation_chat(user_input, chain, st.session_state['history'])
191
+ st.session_state['past'].append(user_input)
192
+ st.session_state['generated'].append(output)
193
+ st.session_state['translated'].append(output)
194
+ st.session_state['translation_requested'].append(False)
195
+ st.rerun()
196
+
197
+
198
+ def process_file(file):
199
+ if file.type == "application/pdf":
200
+ return process_pdf(file)
201
+ elif file.type == "text/plain":
202
+ return file.getvalue().decode("utf-8")
203
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
204
+ return docx2txt.process(file)
205
+ elif file.type == "text/csv":
206
+ return process_csv(file)
207
+ else:
208
+ st.error(f"Unsupported file type: {file.type}")
209
+ return ""
210
+
211
+ def process_csv(file):
212
+ text = ""
213
+ try:
214
+ file_content = file.getvalue().decode('utf-8')
215
+ csvfile = StringIO(file_content)
216
+ reader = csv.reader(csvfile)
217
+ headers = next(reader, None)
218
+ if headers:
219
+ text += f"CSV Headers: {', '.join(headers)}\n\n"
220
+ for i, row in enumerate(reader, 1):
221
+ text += f"Row {i}: {' | '.join(row)}\n"
222
+ text += f"\nTotal rows: {i}\n"
223
+ except Exception as e:
224
+ st.error(f"Error reading CSV file: {e}")
225
+ return text
226
+
227
+ def process_pdf(file):
228
+ text = ""
229
+ with pdfplumber.open(file) as pdf:
230
+ for page_num, page in enumerate(pdf.pages, 1):
231
+ page_text = page.extract_text()
232
+ if page_text:
233
+ text += f"[Page {page_num}]\n{page_text}\n\n"
234
+
235
+ sections = re.findall(r'(?i)(abstract|introduction|methodology|results|discussion|conclusion).*?\n(.*?)(?=\n(?i)(abstract|introduction|methodology|results|discussion|conclusion)|$)', text, re.DOTALL)
236
+ structured_text = "\n\n".join([f"{section[0].capitalize()}:\n{section[1]}" for section in sections])
237
+
238
+ return structured_text if structured_text else text
239
+
240
+ def recognize_speech():
241
+ recognizer = sr.Recognizer()
242
+ with sr.Microphone() as source:
243
+ st.write("Listening... Please speak now.")
244
+ try:
245
+ st.info("Listening for up to 10 seconds...")
246
+ recognizer.adjust_for_ambient_noise(source, duration=1)
247
+ audio = recognizer.listen(source, timeout=10, phrase_time_limit=5)
248
+ st.success("Audio captured. Processing...")
249
+ except sr.WaitTimeoutError:
250
+ st.warning("No speech detected. Please try again.")
251
+ return ""
252
+
253
+ try:
254
+ text = recognizer.recognize_google(audio)
255
+ st.success(f"You said: {text}")
256
+ return text
257
+ except sr.UnknownValueError:
258
+ st.error("Sorry, I couldn't understand that.")
259
+ return ""
260
+ except sr.RequestError as e:
261
+ st.error(f"Could not request results; {e}")
262
+ return ""
263
+
264
+ def create_conversational_chain(vector_store):
265
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name='llama3-70b-8192')
266
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
267
+ chain = ConversationalRetrievalChain.from_llm(
268
+ llm=llm,
269
+ chain_type='stuff',
270
+ retriever=vector_store.as_retriever(search_kwargs={"k": 5}),
271
+ memory=memory
272
+ )
273
+ return chain
274
+
275
+ def main():
276
+ initialize_session_state()
277
+ st.set_page_config(page_title="DOCS Chatbot & Translator", layout="wide")
278
+ st.title("Smart Document Tool 🤓")
279
+
280
+ st.sidebar.header("About App:")
281
+ st.sidebar.write("This app utilizes Streamlit")
282
+
283
+ uploaded_files = st.file_uploader("Upload your Docs", type=["pdf", "txt", "docx", "csv"], accept_multiple_files=True)
284
+
285
+ if uploaded_files:
286
+ all_text = ""
287
+ for uploaded_file in uploaded_files[:MAX_DOCUMENTS]:
288
+ try:
289
+ all_text += f"File: {uploaded_file.name}\n\n{process_file(uploaded_file)}\n\n"
290
+ except Exception as e:
291
+ st.error(f"Error processing file {uploaded_file.name}: {e}")
292
+ finally:
293
+ gc.collect()
294
+
295
+ if len(uploaded_files) > MAX_DOCUMENTS:
296
+ st.warning(f"Only the first {MAX_DOCUMENTS} documents were processed due to memory constraints.")
297
+
298
+ text_splitter = RecursiveCharacterTextSplitter(
299
+ chunk_size=4000,
300
+ chunk_overlap=300,
301
+ length_function=len,
302
+ separators=["\n\n", "\n", " ", ""]
303
+ )
304
+ text_chunks = text_splitter.split_text(all_text)
305
+
306
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
307
+
308
+ with st.spinner('Analyzing Document...'):
309
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
310
+ st.session_state['vector_store'] = vector_store
311
+ st.session_state['chain'] = create_conversational_chain(vector_store)
312
+ display_chat_history(st.session_state['chain'])
313
+
314
+ if st.button('Speak Now'):
315
+ recognized_text = recognize_speech()
316
+ if recognized_text:
317
+ st.session_state['past'].append(recognized_text)
318
+ output = conversation_chat(recognized_text, st.session_state['chain'], st.session_state['history'])
319
+ st.session_state['generated'].append(output)
320
+ st.session_state['translated'].append(output)
321
+ st.session_state['translation_requested'].append(False)
322
+
323
+ audio_bytes = text_to_speech(output)
324
+ st.audio(audio_bytes, format="audio/mp3")
325
+
326
+ st.rerun()
327
+ else:
328
+ st.warning("No speech input was processed. Please try speaking again.")
329
+
330
+ gc.collect()
331
+
332
+ st.sidebar. caption="Your AI Assistant"
333
+ if __name__ == "__main__":
334
+ main()
output.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca905282894e4d2c7b9726cec1e8c44276868dfd3618035b54fe157c0b93684
3
+ size 1305024
response.mp3 ADDED
Binary file (478 kB). View file