Spaces:
Running
on
Zero
Running
on
Zero
mrfakename
commited on
Commit
•
5352edd
1
Parent(s):
6c15fbb
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/infer/utils_infer.py
CHANGED
@@ -182,17 +182,31 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=print, device=
|
|
182 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
183 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
184 |
|
185 |
-
|
|
|
186 |
non_silent_wave = AudioSegment.silent(duration=0)
|
187 |
for non_silent_seg in non_silent_segs:
|
188 |
-
if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) >
|
189 |
-
show_info("Audio is over
|
190 |
break
|
191 |
non_silent_wave += non_silent_seg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
aseg = non_silent_wave
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
196 |
|
197 |
aseg.export(f.name, format="wav")
|
198 |
ref_audio = f.name
|
|
|
182 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
183 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
184 |
|
185 |
+
# 1. try to find long silence for clipping
|
186 |
+
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
|
187 |
non_silent_wave = AudioSegment.silent(duration=0)
|
188 |
for non_silent_seg in non_silent_segs:
|
189 |
+
if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
|
190 |
+
show_info("Audio is over 15s, clipping short.")
|
191 |
break
|
192 |
non_silent_wave += non_silent_seg
|
193 |
+
|
194 |
+
# 2. try to find short silence for clipping if 1. failed
|
195 |
+
if len(non_silent_wave) > 15000:
|
196 |
+
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=200, silence_thresh=-45, keep_silence=1000)
|
197 |
+
non_silent_wave = AudioSegment.silent(duration=0)
|
198 |
+
for non_silent_seg in non_silent_segs:
|
199 |
+
if len(non_silent_wave) > 10000 and len(non_silent_wave + non_silent_seg) > 15000:
|
200 |
+
show_info("Audio is over 15s, clipping short.")
|
201 |
+
break
|
202 |
+
non_silent_wave += non_silent_seg
|
203 |
+
|
204 |
aseg = non_silent_wave
|
205 |
+
|
206 |
+
# 3. if no proper silence found for clipping
|
207 |
+
if len(aseg) > 15000:
|
208 |
+
aseg = aseg[:15000]
|
209 |
+
show_info("Audio is over 15s, clipping short.")
|
210 |
|
211 |
aseg.export(f.name, format="wav")
|
212 |
ref_audio = f.name
|