mrfakename commited on
Commit
5352edd
1 Parent(s): 6c15fbb

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. src/f5_tts/infer/utils_infer.py +20 -6
src/f5_tts/infer/utils_infer.py CHANGED
@@ -182,17 +182,31 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=print, device=
182
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
183
  aseg = AudioSegment.from_file(ref_audio_orig)
184
 
185
- non_silent_segs = silence.split_on_silence(aseg, min_silence_len=500, silence_thresh=-50, keep_silence=1000)
 
186
  non_silent_wave = AudioSegment.silent(duration=0)
187
  for non_silent_seg in non_silent_segs:
188
- if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 18000:
189
- show_info("Audio is over 18s, clipping short.")
190
  break
191
  non_silent_wave += non_silent_seg
 
 
 
 
 
 
 
 
 
 
 
192
  aseg = non_silent_wave
193
- if len(aseg) > 18000: # if no proper silence found for clipping
194
- aseg = aseg[:18000]
195
- show_info("Audio is over 18s, clipping short.")
 
 
196
 
197
  aseg.export(f.name, format="wav")
198
  ref_audio = f.name
 
182
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
183
  aseg = AudioSegment.from_file(ref_audio_orig)
184
 
185
+ # 1. try to find long silence for clipping
186
+ non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
187
  non_silent_wave = AudioSegment.silent(duration=0)
188
  for non_silent_seg in non_silent_segs:
189
+ if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
190
+ show_info("Audio is over 15s, clipping short.")
191
  break
192
  non_silent_wave += non_silent_seg
193
+
194
+ # 2. try to find short silence for clipping if 1. failed
195
+ if len(non_silent_wave) > 15000:
196
+ non_silent_segs = silence.split_on_silence(aseg, min_silence_len=200, silence_thresh=-45, keep_silence=1000)
197
+ non_silent_wave = AudioSegment.silent(duration=0)
198
+ for non_silent_seg in non_silent_segs:
199
+ if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
200
+ show_info("Audio is over 15s, clipping short.")
201
+ break
202
+ non_silent_wave += non_silent_seg
203
+
204
  aseg = non_silent_wave
205
+
206
+ # 3. if no proper silence found for clipping
207
+ if len(aseg) > 15000:
208
+ aseg = aseg[:15000]
209
+ show_info("Audio is over 15s, clipping short.")
210
 
211
  aseg.export(f.name, format="wav")
212
  ref_audio = f.name