KarthickAdopleAI commited on
Commit
28482bd
1 Parent(s): 4bfb1ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -30
app.py CHANGED
@@ -14,6 +14,8 @@ from pytube import YouTube
14
  import requests
15
  import logging
16
  import os
 
 
17
  nltk.download('punkt')
18
  nltk.download('stopwords')
19
 
@@ -27,7 +29,6 @@ class VideoAnalytics:
27
  def __init__(self):
28
  """
29
  Initialize the VideoAnalytics object.
30
-
31
  Args:
32
  hf_token (str): Hugging Face API token.
33
  """
@@ -39,16 +40,7 @@ class VideoAnalytics:
39
  # Initialize transcribed text variable
40
  self.transcribed_text = ""
41
 
42
- # API URL for accessing the Hugging Face model
43
- self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
44
-
45
-
46
- hf_token = os.getenv('HF_TOKEN')
47
- # Placeholder for Hugging Face API token
48
- self.hf_token = hf_token # Replace this with the actual Hugging Face API token
49
-
50
- # Set headers for API requests with Hugging Face token
51
- self.headers = {"Authorization": f"Bearer {self.hf_token}"}
52
 
53
  # Initialize english text variable
54
  self.english_text = ""
@@ -61,13 +53,86 @@ class VideoAnalytics:
61
  # Configure logging settings
62
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def transcribe_video(self, vid: str) -> str:
65
  """
66
  Transcribe the audio of the video.
67
-
68
  Args:
69
  vid (str): Path to the video file.
70
-
71
  Returns:
72
  str: Transcribed text.
73
  """
@@ -78,28 +143,23 @@ class VideoAnalytics:
78
 
79
  # Write audio to a temporary file
80
  audio.write_audiofile("output_audio.mp3")
81
- audio_file = open("output_audio.mp3", "rb")
82
 
83
- # Define a helper function to query the Hugging Face model
84
- def query(data):
85
- response = requests.post(self.API_URL, headers=self.headers, data=data)
86
- return response.json()
87
 
88
- # Send audio data to the Hugging Face model for transcription
89
- output = query(audio_file)
90
 
91
- print(output)
92
  # Update the transcribed_text attribute with the transcription result
93
- self.transcribed_text = output["text"]
94
  # Update the translation text into english_text
95
  self.english_text = self.translation()
96
  # Return the transcribed text
97
- return output["text"]
98
 
99
  except Exception as e:
100
  logging.error(f"Error transcribing video: {e}")
101
  return ""
102
-
103
  def generate_video_summary(self) -> str:
104
  """
105
  Generate a summary of the transcribed video.
@@ -365,10 +425,30 @@ class VideoAnalytics:
365
  # Log any errors that occur during initialization of YouTube object
366
  logging.error(f"Error downloading video: {e}")
367
  return ""
368
- def save_audio_with_gtts(self,text, filename):
369
- tts = gTTS(text=text, lang='en')
370
- tts.save(filename)
371
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
  def main(self, video: str = None, input_path: str = None) -> tuple:
374
  """
@@ -386,7 +466,7 @@ class VideoAnalytics:
386
  video_ = VideoFileClip(input_path)
387
  duration = video_.duration
388
  video_.close()
389
- if round(duration) <= 600:
390
  text = self.transcribe_video(input_path)
391
  else:
392
  return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
@@ -394,7 +474,7 @@ class VideoAnalytics:
394
  video_ = VideoFileClip(video)
395
  duration = video_.duration
396
  video_.close()
397
- if round(duration) <= 600:
398
  text = self.transcribe_video(video)
399
  input_path = video
400
  else:
 
14
  import requests
15
  import logging
16
  import os
17
+ from pydub import AudioSegment
18
+ import speech_recognition as sr
19
  nltk.download('punkt')
20
  nltk.download('stopwords')
21
 
 
29
  def __init__(self):
30
  """
31
  Initialize the VideoAnalytics object.
 
32
  Args:
33
  hf_token (str): Hugging Face API token.
34
  """
 
40
  # Initialize transcribed text variable
41
  self.transcribed_text = ""
42
 
43
+ self.r = sr.Recognizer()
 
 
 
 
 
 
 
 
 
44
 
45
  # Initialize english text variable
46
  self.english_text = ""
 
53
  # Configure logging settings
54
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
55
 
56
+ def mp3_to_wav(self, mp3_file: str, wav_file: str) -> str:
57
+ """
58
+ Convert an MP3 audio file to WAV format.
59
+
60
+ Args:
61
+ mp3_file (str): The path to the input MP3 file.
62
+ wav_file (str): The path to save the output WAV file.
63
+
64
+ Returns:
65
+ str: The filename of the converted WAV file.
66
+
67
+ Raises:
68
+ Exception: If there's an error during the conversion process.
69
+ """
70
+ try:
71
+ # Load the MP3 file
72
+ audio = AudioSegment.from_mp3(mp3_file)
73
+
74
+ # Export the audio to WAV format
75
+ audio.export(wav_file, format="wav")
76
+
77
+ logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
78
+
79
+ return wav_file
80
+ except Exception as e:
81
+ # Log the exception and raise it further
82
+ logging.error(f"Error occurred while converting MP3 to WAV: {e}")
83
+ raise e
84
+
85
+ # Function to recognize speech in the audio file
86
+ def transcribe_audio(self,path):
87
+ """Transcribe speech from an audio file."""
88
+ try:
89
+ with sr.AudioFile(path) as source:
90
+ audio_listened = r.record(source)
91
+ text = r.recognize_google(audio_listened)
92
+ return text
93
+ except sr.UnknownValueError as e:
94
+ logging.error(f"Speech recognition could not understand audio: {e}")
95
+ return ""
96
+ except sr.RequestError as e:
97
+ logging.error(f"Could not request results from Google Speech Recognition service: {e}")
98
+ return ""
99
+
100
+ # Function to split the audio file into chunks on silence and apply speech recognition
101
+ def get_large_audio_transcription_on_silence(self,path):
102
+ """Split the large audio file into chunks and apply speech recognition on each chunk."""
103
+ try:
104
+ sound = AudioSegment.from_file(path)
105
+ chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
106
+ folder_name = "audio-chunks"
107
+
108
+ if not os.path.isdir(folder_name):
109
+ os.mkdir(folder_name)
110
+
111
+ whole_text = ""
112
+
113
+ for i, audio_chunk in enumerate(chunks, start=1):
114
+ chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
115
+ audio_chunk.export(chunk_filename, format="wav")
116
+
117
+ text = self.transcribe_audio(chunk_filename)
118
+
119
+ if text:
120
+ text = f"{text.capitalize()}. "
121
+ logging.info(f"Transcribed {chunk_filename}: {text}")
122
+ whole_text += text
123
+ else:
124
+ logging.warning(f"No speech recognized in {chunk_filename}")
125
+
126
+ return whole_text
127
+ except Exception as e:
128
+ logging.error(f"Error processing audio: {e}")
129
+ return ""
130
+
131
  def transcribe_video(self, vid: str) -> str:
132
  """
133
  Transcribe the audio of the video.
 
134
  Args:
135
  vid (str): Path to the video file.
 
136
  Returns:
137
  str: Transcribed text.
138
  """
 
143
 
144
  # Write audio to a temporary file
145
  audio.write_audiofile("output_audio.mp3")
 
146
 
 
 
 
 
147
 
148
+ # Replace 'input.mp3' and 'output.wav' with your file paths
149
+ audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
150
 
151
+ text = self.get_large_audio_transcription_on_silence(audio_filename)
152
  # Update the transcribed_text attribute with the transcription result
153
+ self.transcribed_text = text
154
  # Update the translation text into english_text
155
  self.english_text = self.translation()
156
  # Return the transcribed text
157
+ return text
158
 
159
  except Exception as e:
160
  logging.error(f"Error transcribing video: {e}")
161
  return ""
162
+
163
  def generate_video_summary(self) -> str:
164
  """
165
  Generate a summary of the transcribed video.
 
425
  # Log any errors that occur during initialization of YouTube object
426
  logging.error(f"Error downloading video: {e}")
427
  return ""
428
+
429
+ def save_audio_with_gtts(self, text: str, filename: str) -> str:
430
+ """
431
+ Generate an audio file from the given text using gTTS and save it.
432
+
433
+ Args:
434
+ text (str): The text to be converted into speech.
435
+ filename (str): The filename (including path) to save the audio file.
436
+
437
+ Returns:
438
+ str: The filename of the saved audio file.
439
+
440
+ Raises:
441
+ Exception: If there's an error during the conversion or saving process.
442
+ """
443
+ try:
444
+ tts = gTTS(text=text, lang='en')
445
+ tts.save(filename)
446
+ logging.info(f"Audio file saved successfully: {filename}")
447
+ return filename
448
+ except Exception as e:
449
+ # Log the exception and raise it further
450
+ logging.error(f"Error occurred while saving audio: {e}")
451
+ raise e
452
 
453
  def main(self, video: str = None, input_path: str = None) -> tuple:
454
  """
 
466
  video_ = VideoFileClip(input_path)
467
  duration = video_.duration
468
  video_.close()
469
+ if round(duration) <= 6*600:
470
  text = self.transcribe_video(input_path)
471
  else:
472
  return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
 
474
  video_ = VideoFileClip(video)
475
  duration = video_.duration
476
  video_.close()
477
+ if round(duration) <= 6*600:
478
  text = self.transcribe_video(video)
479
  input_path = video
480
  else: