mrfakename
commited on
Commit
•
4064aae
1
Parent(s):
14d6715
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
- app.py +2 -3
- inference-cli.py +2 -3
- model/utils.py +2 -3
app.py
CHANGED
@@ -158,9 +158,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
158 |
|
159 |
# Calculate duration
|
160 |
ref_audio_len = audio.shape[-1] // hop_length
|
161 |
-
|
162 |
-
|
163 |
-
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
164 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
165 |
|
166 |
# inference
|
|
|
158 |
|
159 |
# Calculate duration
|
160 |
ref_audio_len = audio.shape[-1] // hop_length
|
161 |
+
ref_text_len = len(ref_text.encode('utf-8'))
|
162 |
+
gen_text_len = len(gen_text.encode('utf-8'))
|
|
|
163 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
164 |
|
165 |
# inference
|
inference-cli.py
CHANGED
@@ -250,9 +250,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, model,ckpt_file,file_voca
|
|
250 |
|
251 |
# Calculate duration
|
252 |
ref_audio_len = audio.shape[-1] // hop_length
|
253 |
-
|
254 |
-
|
255 |
-
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
256 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
257 |
|
258 |
# inference
|
|
|
250 |
|
251 |
# Calculate duration
|
252 |
ref_audio_len = audio.shape[-1] // hop_length
|
253 |
+
ref_text_len = len(ref_text.encode('utf-8'))
|
254 |
+
gen_text_len = len(gen_text.encode('utf-8'))
|
|
|
255 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
256 |
|
257 |
# inference
|
model/utils.py
CHANGED
@@ -296,9 +296,8 @@ def get_inference_prompt(
|
|
296 |
# # test vocoder resynthesis
|
297 |
# ref_audio = gt_audio
|
298 |
else:
|
299 |
-
|
300 |
-
|
301 |
-
gen_text_len = len(gt_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gt_text))
|
302 |
total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
|
303 |
|
304 |
# to mel spectrogram
|
|
|
296 |
# # test vocoder resynthesis
|
297 |
# ref_audio = gt_audio
|
298 |
else:
|
299 |
+
ref_text_len = len(prompt_text.encode('utf-8'))
|
300 |
+
gen_text_len = len(gt_text.encode('utf-8'))
|
|
|
301 |
total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
|
302 |
|
303 |
# to mel spectrogram
|