KarthickAdopleAI commited on
Commit
3ae161b
1 Parent(s): 95d1f8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -15,8 +15,9 @@ import requests
15
  import logging
16
  import os
17
  from pydub import AudioSegment
18
- from pydub.silence import split_on_silence
19
  import speech_recognition as sr
 
 
20
  nltk.download('punkt')
21
  nltk.download('stopwords')
22
 
@@ -43,6 +44,7 @@ class VideoAnalytics:
43
 
44
  self.r = sr.Recognizer()
45
 
 
46
  # Initialize english text variable
47
  self.english_text = ""
48
 
@@ -84,12 +86,12 @@ class VideoAnalytics:
84
  raise e
85
 
86
  # Function to recognize speech in the audio file
87
- def transcribe_audio(self,path):
88
  """Transcribe speech from an audio file."""
89
  try:
90
  with sr.AudioFile(path) as source:
91
  audio_listened = self.r.record(source)
92
- text = self.r.recognize_google(audio_listened)
93
  return text
94
  except sr.UnknownValueError as e:
95
  logging.error(f"Speech recognition could not understand audio: {e}")
@@ -99,7 +101,7 @@ class VideoAnalytics:
99
  return ""
100
 
101
  # Function to split the audio file into chunks on silence and apply speech recognition
102
- def get_large_audio_transcription_on_silence(self,path):
103
  """Split the large audio file into chunks and apply speech recognition on each chunk."""
104
  try:
105
  sound = AudioSegment.from_file(path)
@@ -115,7 +117,7 @@ class VideoAnalytics:
115
  chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
116
  audio_chunk.export(chunk_filename, format="wav")
117
 
118
- text = self.transcribe_audio(chunk_filename)
119
 
120
  if text:
121
  text = f"{text.capitalize()}. "
@@ -148,8 +150,11 @@ class VideoAnalytics:
148
 
149
  # Replace 'input.mp3' and 'output.wav' with your file paths
150
  audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
151
-
152
- text = self.get_large_audio_transcription_on_silence(audio_filename)
 
 
 
153
  # Update the transcribed_text attribute with the transcription result
154
  self.transcribed_text = text
155
  # Update the translation text into english_text
 
15
  import logging
16
  import os
17
  from pydub import AudioSegment
 
18
  import speech_recognition as sr
19
+ import torchaudio
20
+ from speechbrain.inference.classifiers import EncoderClassifier
21
  nltk.download('punkt')
22
  nltk.download('stopwords')
23
 
 
44
 
45
  self.r = sr.Recognizer()
46
 
47
+ self.language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
48
  # Initialize english text variable
49
  self.english_text = ""
50
 
 
86
  raise e
87
 
88
  # Function to recognize speech in the audio file
89
+ def transcribe_audio(self,path: str,lang: str):
90
  """Transcribe speech from an audio file."""
91
  try:
92
  with sr.AudioFile(path) as source:
93
  audio_listened = self.r.record(source)
94
+ text = self.r.recognize_google(audio_listened,language=lang)
95
  return text
96
  except sr.UnknownValueError as e:
97
  logging.error(f"Speech recognition could not understand audio: {e}")
 
101
  return ""
102
 
103
  # Function to split the audio file into chunks on silence and apply speech recognition
104
+ def get_large_audio_transcription_on_silence(self,path: str,lang: str):
105
  """Split the large audio file into chunks and apply speech recognition on each chunk."""
106
  try:
107
  sound = AudioSegment.from_file(path)
 
117
  chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
118
  audio_chunk.export(chunk_filename, format="wav")
119
 
120
+ text = self.transcribe_audio(chunk_filename,lang)
121
 
122
  if text:
123
  text = f"{text.capitalize()}. "
 
150
 
151
  # Replace 'input.mp3' and 'output.wav' with your file paths
152
  audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
153
+ # for detect lang
154
+ signal = self.language_id.load_audio("/content/output_.wav")
155
+ prediction = self.language_id.classify_batch(signal)
156
+ lang = [prediction[3][0].split(":")][0][0]
157
+ text = self.get_large_audio_transcription_on_silence(audio_filename,lang)
158
  # Update the transcribed_text attribute with the transcription result
159
  self.transcribed_text = text
160
  # Update the translation text into english_text