Spaces:
Edmond98
/
Running on A100

Edmond7 commited on
Commit
0960663
1 Parent(s): 95eaa4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -20,6 +20,7 @@ import boto3
20
  from botocore.exceptions import NoCredentialsError
21
  import time
22
  import tempfile
 
23
 
24
  # Import functions from other modules
25
  from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
@@ -81,49 +82,49 @@ def extract_audio_from_file(input_bytes):
81
  temp_file_path = temp_file.name
82
 
83
  try:
84
- # First, try to read as a standard audio file
85
- audio_array, sample_rate = sf.read(temp_file_path)
86
- return audio_array, sample_rate
87
- except Exception:
 
 
 
 
 
 
 
 
 
88
  try:
89
- # Try to read as a video file
90
  video = VideoFileClip(temp_file_path)
91
  audio = video.audio
92
  if audio is not None:
93
- # Extract audio from video
94
  audio_array = audio.to_soundarray()
95
  sample_rate = audio.fps
96
-
97
- # Convert to mono if stereo
98
- if len(audio_array.shape) > 1 and audio_array.shape[1] > 1:
99
- audio_array = audio_array.mean(axis=1)
100
-
101
- # Ensure audio is float32 and normalized
102
  audio_array = audio_array.astype(np.float32)
103
  audio_array /= np.max(np.abs(audio_array))
104
-
105
  video.close()
 
106
  return audio_array, sample_rate
107
  else:
108
- raise ValueError("Video file contains no audio")
109
- except Exception:
110
- # If video reading fails, try as generic audio with pydub
111
- try:
112
- audio = AudioSegment.from_file(temp_file_path)
113
- audio_array = np.array(audio.get_array_of_samples())
114
-
115
- # Convert to float32 and normalize
116
- audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
117
-
118
- # Convert stereo to mono if necessary
119
- if audio.channels == 2:
120
- audio_array = audio_array.reshape((-1, 2)).mean(axis=1)
121
-
122
- return audio_array, audio.frame_rate
123
- except Exception as e:
124
- raise ValueError(f"Unsupported file format: {str(e)}")
125
  finally:
126
- # Clean up the temporary file
127
  os.unlink(temp_file_path)
128
 
129
  @app.post("/transcribe")
@@ -342,7 +343,8 @@ async def identify_language_file(
342
  processing_time = time.time() - start_time
343
  return JSONResponse(
344
  status_code=500,
345
- content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time})
 
346
 
347
  @app.post("/asr_languages")
348
  async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
@@ -392,12 +394,10 @@ async def get_tts_languages(request: LanguageRequest, api_key: APIKey = Depends(
392
  content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
393
  )
394
 
395
- # If you want to add a health check endpoint
396
  @app.get("/health")
397
  async def health_check():
398
  return {"status": "ok"}
399
 
400
- # You might also want to add a root endpoint that provides basic API information
401
  @app.get("/")
402
  async def root():
403
  return {
 
20
  from botocore.exceptions import NoCredentialsError
21
  import time
22
  import tempfile
23
+ import magic
24
 
25
  # Import functions from other modules
26
  from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
 
82
  temp_file_path = temp_file.name
83
 
84
  try:
85
+ # Log file info
86
+ file_info = magic.from_file(temp_file_path, mime=True)
87
+ logger.info(f"Received file of type: {file_info}")
88
+
89
+ # Try reading with soundfile first
90
+ try:
91
+ audio_array, sample_rate = sf.read(temp_file_path)
92
+ logger.info(f"Successfully read audio with soundfile. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
93
+ return audio_array, sample_rate
94
+ except Exception as e:
95
+ logger.info(f"Could not read with soundfile: {str(e)}")
96
+
97
+ # Try reading as video
98
  try:
 
99
  video = VideoFileClip(temp_file_path)
100
  audio = video.audio
101
  if audio is not None:
 
102
  audio_array = audio.to_soundarray()
103
  sample_rate = audio.fps
104
+ audio_array = audio_array.mean(axis=1) if len(audio_array.shape) > 1 and audio_array.shape[1] > 1 else audio_array
 
 
 
 
 
105
  audio_array = audio_array.astype(np.float32)
106
  audio_array /= np.max(np.abs(audio_array))
 
107
  video.close()
108
+ logger.info(f"Successfully extracted audio from video. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
109
  return audio_array, sample_rate
110
  else:
111
+ logger.info("Video file contains no audio")
112
+ except Exception as e:
113
+ logger.info(f"Could not read as video: {str(e)}")
114
+
115
+ # Try reading with pydub
116
+ try:
117
+ audio = AudioSegment.from_file(temp_file_path)
118
+ audio_array = np.array(audio.get_array_of_samples())
119
+ audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
120
+ audio_array = audio_array.reshape((-1, 2)).mean(axis=1) if audio.channels == 2 else audio_array
121
+ logger.info(f"Successfully read audio with pydub. Shape: {audio_array.shape}, Sample rate: {audio.frame_rate}")
122
+ return audio_array, audio.frame_rate
123
+ except Exception as e:
124
+ logger.info(f"Could not read with pydub: {str(e)}")
125
+
126
+ raise ValueError(f"Unsupported file format: {file_info}")
 
127
  finally:
 
128
  os.unlink(temp_file_path)
129
 
130
  @app.post("/transcribe")
 
343
  processing_time = time.time() - start_time
344
  return JSONResponse(
345
  status_code=500,
346
+ content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
347
+ )
348
 
349
  @app.post("/asr_languages")
350
  async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
 
394
  content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
395
  )
396
 
 
397
  @app.get("/health")
398
  async def health_check():
399
  return {"status": "ok"}
400
 
 
401
  @app.get("/")
402
  async def root():
403
  return {