Afrinetwork7 commited on
Commit
118252d
1 Parent(s): d63d47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -60,17 +60,20 @@ async def transcribe_chunked_audio(audio_file: UploadFile, task: str = "transcri
60
  raise HTTPException(status_code=400, detail=f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.")
61
 
62
  try:
 
63
  with open(audio_file.filename, "rb") as f:
64
  inputs = f.read()
65
  except Exception as e:
66
  logger.error("Error reading audio file:", exc_info=True)
67
  raise HTTPException(status_code=500, detail="Error reading audio file")
68
 
 
69
  inputs = ffmpeg_read(inputs, pipeline.feature_extractor.sampling_rate)
70
  inputs = {"array": inputs, "sampling_rate": pipeline.feature_extractor.sampling_rate}
71
  logger.debug("Done loading audio file")
72
 
73
  try:
 
74
  text, runtime = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
75
  except Exception as e:
76
  logger.error("Error transcribing audio:", exc_info=True)
@@ -97,6 +100,7 @@ async def transcribe_youtube(yt_url: str = Form(...), task: str = "transcribe",
97
  raise HTTPException(status_code=500, detail="Error downloading YouTube audio")
98
 
99
  try:
 
100
  with open(filepath, "rb") as f:
101
  inputs = f.read()
102
  except Exception as e:
@@ -108,6 +112,7 @@ async def transcribe_youtube(yt_url: str = Form(...), task: str = "transcribe",
108
  logger.debug("Done loading YouTube file")
109
 
110
  try:
 
111
  text, runtime = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
112
  except Exception as e:
113
  logger.error("Error transcribing YouTube audio:", exc_info=True)
@@ -121,17 +126,19 @@ def tqdm_generate(inputs: dict, task: str, return_timestamps: bool):
121
  num_samples = len(all_chunk_start_idx)
122
  num_batches = math.ceil(num_samples / BATCH_SIZE)
123
 
 
124
  dataloader = pipeline.preprocess_batch(inputs, chunk_length_s=CHUNK_LENGTH_S, batch_size=BATCH_SIZE)
125
  model_outputs = []
126
  start_time = time.time()
127
  logger.debug("Transcribing...")
128
  # iterate over our chunked audio samples - always predict timestamps to reduce hallucinations
129
  for batch in dataloader:
 
130
  model_outputs.append(pipeline.forward(batch, batch_size=BATCH_SIZE, task=task, return_timestamps=True))
131
  runtime = time.time() - start_time
132
  logger.debug("Done transcription")
133
 
134
- logger.debug("Post-processing...")
135
  try:
136
  post_processed = pipeline.postprocess(model_outputs, return_timestamps=True)
137
  except Exception as e:
 
60
  raise HTTPException(status_code=400, detail=f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.")
61
 
62
  try:
63
+ logger.debug(f"Opening audio file: {audio_file.filename}")
64
  with open(audio_file.filename, "rb") as f:
65
  inputs = f.read()
66
  except Exception as e:
67
  logger.error("Error reading audio file:", exc_info=True)
68
  raise HTTPException(status_code=500, detail="Error reading audio file")
69
 
70
+ logger.debug("Performing ffmpeg read on audio file")
71
  inputs = ffmpeg_read(inputs, pipeline.feature_extractor.sampling_rate)
72
  inputs = {"array": inputs, "sampling_rate": pipeline.feature_extractor.sampling_rate}
73
  logger.debug("Done loading audio file")
74
 
75
  try:
76
+ logger.debug("Calling tqdm_generate to transcribe audio")
77
  text, runtime = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
78
  except Exception as e:
79
  logger.error("Error transcribing audio:", exc_info=True)
 
100
  raise HTTPException(status_code=500, detail="Error downloading YouTube audio")
101
 
102
  try:
103
+ logger.debug(f"Opening downloaded audio file: {filepath}")
104
  with open(filepath, "rb") as f:
105
  inputs = f.read()
106
  except Exception as e:
 
112
  logger.debug("Done loading YouTube file")
113
 
114
  try:
115
+ logger.debug("Calling tqdm_generate to transcribe YouTube audio")
116
  text, runtime = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
117
  except Exception as e:
118
  logger.error("Error transcribing YouTube audio:", exc_info=True)
 
126
  num_samples = len(all_chunk_start_idx)
127
  num_batches = math.ceil(num_samples / BATCH_SIZE)
128
 
129
+ logger.debug("Preprocessing audio for inference")
130
  dataloader = pipeline.preprocess_batch(inputs, chunk_length_s=CHUNK_LENGTH_S, batch_size=BATCH_SIZE)
131
  model_outputs = []
132
  start_time = time.time()
133
  logger.debug("Transcribing...")
134
  # iterate over our chunked audio samples - always predict timestamps to reduce hallucinations
135
  for batch in dataloader:
136
+ logger.debug(f"Processing batch of {len(batch)} samples")
137
  model_outputs.append(pipeline.forward(batch, batch_size=BATCH_SIZE, task=task, return_timestamps=True))
138
  runtime = time.time() - start_time
139
  logger.debug("Done transcription")
140
 
141
+ logger.debug("Post-processing transcription results")
142
  try:
143
  post_processed = pipeline.postprocess(model_outputs, return_timestamps=True)
144
  except Exception as e: