sts

Running on A100

App Files Files Community

Edmond7 commited on about 6 hours ago

Commit

0960663

•

1 Parent(s): 95eaa4c

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -34

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ import boto3
 from botocore.exceptions import NoCredentialsError
 import time
 import tempfile
 # Import functions from other modules
 from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
@@ -81,49 +82,49 @@ def extract_audio_from_file(input_bytes):
         temp_file_path = temp_file.name
     try:
-        # First, try to read as a standard audio file
-        audio_array, sample_rate = sf.read(temp_file_path)
-        return audio_array, sample_rate
-    except Exception:
         try:
-            # Try to read as a video file
             video = VideoFileClip(temp_file_path)
             audio = video.audio
             if audio is not None:
-                # Extract audio from video
                 audio_array = audio.to_soundarray()
                 sample_rate = audio.fps
-                # Convert to mono if stereo
-                if len(audio_array.shape) > 1 and audio_array.shape[1] > 1:
-                    audio_array = audio_array.mean(axis=1)
-                # Ensure audio is float32 and normalized
                 audio_array = audio_array.astype(np.float32)
                 audio_array /= np.max(np.abs(audio_array))
                 video.close()
                 return audio_array, sample_rate
             else:
-                raise ValueError("Video file contains no audio")
-        except Exception:
-            # If video reading fails, try as generic audio with pydub
-            try:
-                audio = AudioSegment.from_file(temp_file_path)
-                audio_array = np.array(audio.get_array_of_samples())
-                # Convert to float32 and normalize
-                audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
-                # Convert stereo to mono if necessary
-                if audio.channels == 2:
-                    audio_array = audio_array.reshape((-1, 2)).mean(axis=1)
-                return audio_array, audio.frame_rate
-            except Exception as e:
-                raise ValueError(f"Unsupported file format: {str(e)}")
     finally:
-        # Clean up the temporary file
         os.unlink(temp_file_path)
 @app.post("/transcribe")
@@ -342,7 +343,8 @@ async def identify_language_file(
         processing_time = time.time() - start_time
         return JSONResponse(
             status_code=500,
-            content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time})
 @app.post("/asr_languages")
 async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
@@ -392,12 +394,10 @@ async def get_tts_languages(request: LanguageRequest, api_key: APIKey = Depends(
             content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
         )
-# If you want to add a health check endpoint
 @app.get("/health")
 async def health_check():
     return {"status": "ok"}
-# You might also want to add a root endpoint that provides basic API information
 @app.get("/")
 async def root():
     return {

 from botocore.exceptions import NoCredentialsError
 import time
 import tempfile
+import magic
 # Import functions from other modules
 from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
         temp_file_path = temp_file.name
     try:
+        # Log file info
+        file_info = magic.from_file(temp_file_path, mime=True)
+        logger.info(f"Received file of type: {file_info}")
+        # Try reading with soundfile first
+        try:
+            audio_array, sample_rate = sf.read(temp_file_path)
+            logger.info(f"Successfully read audio with soundfile. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
+            return audio_array, sample_rate
+        except Exception as e:
+            logger.info(f"Could not read with soundfile: {str(e)}")
+        # Try reading as video
         try:
             video = VideoFileClip(temp_file_path)
             audio = video.audio
             if audio is not None:
                 audio_array = audio.to_soundarray()
                 sample_rate = audio.fps
+                audio_array = audio_array.mean(axis=1) if len(audio_array.shape) > 1 and audio_array.shape[1] > 1 else audio_array
                 audio_array = audio_array.astype(np.float32)
                 audio_array /= np.max(np.abs(audio_array))
                 video.close()
+                logger.info(f"Successfully extracted audio from video. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
                 return audio_array, sample_rate
             else:
+                logger.info("Video file contains no audio")
+        except Exception as e:
+            logger.info(f"Could not read as video: {str(e)}")
+        # Try reading with pydub
+        try:
+            audio = AudioSegment.from_file(temp_file_path)
+            audio_array = np.array(audio.get_array_of_samples())
+            audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
+            audio_array = audio_array.reshape((-1, 2)).mean(axis=1) if audio.channels == 2 else audio_array
+            logger.info(f"Successfully read audio with pydub. Shape: {audio_array.shape}, Sample rate: {audio.frame_rate}")
+            return audio_array, audio.frame_rate
+        except Exception as e:
+            logger.info(f"Could not read with pydub: {str(e)}")
+        raise ValueError(f"Unsupported file format: {file_info}")
     finally:
         os.unlink(temp_file_path)
 @app.post("/transcribe")
         processing_time = time.time() - start_time
         return JSONResponse(
             status_code=500,
+            content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
+        )
 @app.post("/asr_languages")
 async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
             content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
         )
 @app.get("/health")
 async def health_check():
     return {"status": "ok"}
 @app.get("/")
 async def root():
     return {