Spaces:

ADOPLE
/

Video_Analytics

Running

App Files Files Community

KarthickAdopleAI commited on Mar 28

Commit

7473f11

•

1 Parent(s): 6f1058d

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -27

app.py CHANGED Viewed

@@ -27,7 +27,6 @@ class VideoAnalytics:
     def __init__(self):
       """
       Initialize the VideoAnalytics object.
       Args:
           hf_token (str): Hugging Face API token.
       """
@@ -39,16 +38,7 @@ class VideoAnalytics:
       # Initialize transcribed text variable
       self.transcribed_text = ""
-      # API URL for accessing the Hugging Face model
-      self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
-      hf_token = os.getenv('HF_TOKEN')
-      # Placeholder for Hugging Face API token
-      self.hf_token = hf_token # Replace this with the actual Hugging Face API token
-      # Set headers for API requests with Hugging Face token
-      self.headers = {"Authorization": f"Bearer {self.hf_token}"}
       # Initialize english text variable
       self.english_text = ""
@@ -64,10 +54,8 @@ class VideoAnalytics:
     def transcribe_video(self, vid: str) -> str:
       """
       Transcribe the audio of the video.
       Args:
           vid (str): Path to the video file.
       Returns:
           str: Transcribed text.
       """
@@ -79,22 +67,13 @@ class VideoAnalytics:
           # Write audio to a temporary file
           audio.write_audiofile("output_audio.mp3")
           audio_file = open("output_audio.mp3", "rb")
-          # Define a helper function to query the Hugging Face model
-          def query(data):
-              response = requests.post(self.API_URL, headers=self.headers, data=data)
-              return response.json()
-          # Send audio data to the Hugging Face model for transcription
-          output = query(audio_file)
-          print(output)
           # Update the transcribed_text attribute with the transcription result
-          self.transcribed_text = output["text"]
           # Update the translation text into english_text
           self.english_text = self.translation()
           # Return the transcribed text
-          return output["text"]
       except Exception as e:
           logging.error(f"Error transcribing video: {e}")
@@ -401,7 +380,7 @@ class VideoAnalytics:
             video_ = VideoFileClip(input_path)
             duration = video_.duration
             video_.close()
-            if round(duration) <= 600:
               text = self.transcribe_video(input_path)
             else:
               return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","",""
@@ -409,7 +388,7 @@ class VideoAnalytics:
             video_ = VideoFileClip(video)
             duration = video_.duration
             video_.close()
-            if round(duration) <= 600:
               text = self.transcribe_video(video)
               input_path = video
             else:

     def __init__(self):
       """
       Initialize the VideoAnalytics object.
       Args:
           hf_token (str): Hugging Face API token.
       """
       # Initialize transcribed text variable
       self.transcribed_text = ""
+      self.s2t_model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
       # Initialize english text variable
       self.english_text = ""
     def transcribe_video(self, vid: str) -> str:
       """
       Transcribe the audio of the video.
       Args:
           vid (str): Path to the video file.
       Returns:
           str: Transcribed text.
       """
           # Write audio to a temporary file
           audio.write_audiofile("output_audio.mp3")
           audio_file = open("output_audio.mp3", "rb")
+          transcriptions = self.s2t_model.transcribe(["output_audio.mp3"])
           # Update the transcribed_text attribute with the transcription result
+          self.transcribed_text = transcriptions[0]['transcription']
           # Update the translation text into english_text
           self.english_text = self.translation()
           # Return the transcribed text
+          return transcriptions[0]['transcription']
       except Exception as e:
           logging.error(f"Error transcribing video: {e}")
             video_ = VideoFileClip(input_path)
             duration = video_.duration
             video_.close()
+            if round(duration) <= 36000:
               text = self.transcribe_video(input_path)
             else:
               return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","",""
             video_ = VideoFileClip(video)
             duration = video_.duration
             video_.close()
+            if round(duration) <= 36000:
               text = self.transcribe_video(video)
               input_path = video
             else: