Spaces:

ADOPLE
/

Video_Analytics

Sleeping

App Files Files Community

KarthickAdopleAI commited on Mar 28

Commit

c1a388c

•

1 Parent(s): 2f4a315

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ class VideoAnalytics:
     def __init__(self):
       """
       Initialize the VideoAnalytics object.
       Args:
           hf_token (str): Hugging Face API token.
       """
@@ -39,7 +40,16 @@ class VideoAnalytics:
       # Initialize transcribed text variable
       self.transcribed_text = ""
-      self.s2t_model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
       # Initialize english text variable
       self.english_text = ""
@@ -55,8 +65,10 @@ class VideoAnalytics:
     def transcribe_video(self, vid: str) -> str:
       """
       Transcribe the audio of the video.
       Args:
           vid (str): Path to the video file.
       Returns:
           str: Transcribed text.
       """
@@ -68,18 +80,27 @@ class VideoAnalytics:
           # Write audio to a temporary file
           audio.write_audiofile("output_audio.mp3")
           audio_file = open("output_audio.mp3", "rb")
-          transcriptions = self.s2t_model.transcribe(["output_audio.mp3"])
           # Update the transcribed_text attribute with the transcription result
-          self.transcribed_text = transcriptions[0]['transcription']
           # Update the translation text into english_text
           self.english_text = self.translation()
           # Return the transcribed text
-          return transcriptions[0]['transcription']
       except Exception as e:
           logging.error(f"Error transcribing video: {e}")
           return ""
     def generate_video_summary(self) -> str:
         """
         Generate a summary of the transcribed video.

     def __init__(self):
       """
       Initialize the VideoAnalytics object.
       Args:
           hf_token (str): Hugging Face API token.
       """
       # Initialize transcribed text variable
       self.transcribed_text = ""
+      # API URL for accessing the Hugging Face model
+      self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+      hf_token = os.getenv('HF_TOKEN')
+      # Placeholder for Hugging Face API token
+      self.hf_token = hf_token # Replace this with the actual Hugging Face API token
+      # Set headers for API requests with Hugging Face token
+      self.headers = {"Authorization": f"Bearer {self.hf_token}"}
       # Initialize english text variable
       self.english_text = ""
     def transcribe_video(self, vid: str) -> str:
       """
       Transcribe the audio of the video.
       Args:
           vid (str): Path to the video file.
       Returns:
           str: Transcribed text.
       """
           # Write audio to a temporary file
           audio.write_audiofile("output_audio.mp3")
           audio_file = open("output_audio.mp3", "rb")
+          # Define a helper function to query the Hugging Face model
+          def query(data):
+              response = requests.post(self.API_URL, headers=self.headers, data=data)
+              return response.json()
+          # Send audio data to the Hugging Face model for transcription
+          output = query(audio_file)
+          print(output)
           # Update the transcribed_text attribute with the transcription result
+          self.transcribed_text = output["text"]
           # Update the translation text into english_text
           self.english_text = self.translation()
           # Return the transcribed text
+          return output["text"]
       except Exception as e:
           logging.error(f"Error transcribing video: {e}")
           return ""
     def generate_video_summary(self) -> str:
         """
         Generate a summary of the transcribed video.