mrfakename commited on
Commit
3f5b3b4
1 Parent(s): 3536c5f

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. src/f5_tts/infer/infer_cli.py +10 -3
src/f5_tts/infer/infer_cli.py CHANGED
@@ -75,6 +75,12 @@ parser.add_argument(
75
  action="store_true",
76
  help="load vocoder from local. Default: ../checkpoints/charactr/vocos-mel-24khz",
77
  )
 
 
 
 
 
 
78
  args = parser.parse_args()
79
 
80
  config = tomli.load(open(args.config, "rb"))
@@ -102,6 +108,7 @@ model = args.model if args.model else config["model"]
102
  ckpt_file = args.ckpt_file if args.ckpt_file else ""
103
  vocab_file = args.vocab_file if args.vocab_file else ""
104
  remove_silence = args.remove_silence if args.remove_silence else config["remove_silence"]
 
105
  wave_path = Path(output_dir) / "infer_cli_out.wav"
106
  # spectrogram_path = Path(output_dir) / "infer_cli_out.png"
107
  vocos_local_path = "../checkpoints/charactr/vocos-mel-24khz"
@@ -134,7 +141,7 @@ print(f"Using {model}...")
134
  ema_model = load_model(model_cls, model_cfg, ckpt_file, vocab_file)
135
 
136
 
137
- def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
138
  main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
139
  if "voices" not in config:
140
  voices = {"main": main_voice}
@@ -168,7 +175,7 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
168
  ref_audio = voices[voice]["ref_audio"]
169
  ref_text = voices[voice]["ref_text"]
170
  print(f"Voice: {voice}")
171
- audio, final_sample_rate, spectragram = infer_process(ref_audio, ref_text, gen_text, model_obj)
172
  generated_audio_segments.append(audio)
173
 
174
  if generated_audio_segments:
@@ -186,7 +193,7 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
186
 
187
 
188
  def main():
189
- main_process(ref_audio, ref_text, gen_text, ema_model, remove_silence)
190
 
191
 
192
  if __name__ == "__main__":
 
75
  action="store_true",
76
  help="load vocoder from local. Default: ../checkpoints/charactr/vocos-mel-24khz",
77
  )
78
+ parser.add_argument(
79
+ "--speed",
80
+ type=float,
81
+ default=1.0,
82
+ help="Adjust the speed of the audio generation (default: 1.0)",
83
+ )
84
  args = parser.parse_args()
85
 
86
  config = tomli.load(open(args.config, "rb"))
 
108
  ckpt_file = args.ckpt_file if args.ckpt_file else ""
109
  vocab_file = args.vocab_file if args.vocab_file else ""
110
  remove_silence = args.remove_silence if args.remove_silence else config["remove_silence"]
111
+ speed = args.speed
112
  wave_path = Path(output_dir) / "infer_cli_out.wav"
113
  # spectrogram_path = Path(output_dir) / "infer_cli_out.png"
114
  vocos_local_path = "../checkpoints/charactr/vocos-mel-24khz"
 
141
  ema_model = load_model(model_cls, model_cfg, ckpt_file, vocab_file)
142
 
143
 
144
+ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence, speed):
145
  main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
146
  if "voices" not in config:
147
  voices = {"main": main_voice}
 
175
  ref_audio = voices[voice]["ref_audio"]
176
  ref_text = voices[voice]["ref_text"]
177
  print(f"Voice: {voice}")
178
+ audio, final_sample_rate, spectragram = infer_process(ref_audio, ref_text, gen_text, model_obj, speed=speed)
179
  generated_audio_segments.append(audio)
180
 
181
  if generated_audio_segments:
 
193
 
194
 
195
  def main():
196
+ main_process(ref_audio, ref_text, gen_text, ema_model, remove_silence, speed)
197
 
198
 
199
  if __name__ == "__main__":