Spaces:
Sleeping
Sleeping
Loss: | |
MelReconLoss: | |
enable: true | |
params: {loss_type: mae} | |
ProsodyReconLoss: | |
enable: true | |
params: {loss_type: mae} | |
Model: | |
KanTtsSAMBERT: | |
optimizer: | |
params: | |
betas: [0.9, 0.98] | |
eps: 1.0e-09 | |
lr: 0.001 | |
weight_decay: 0.0 | |
type: Adam | |
params: | |
MAS: false | |
NSF: true | |
SE: true | |
decoder_attention_dropout: 0.1 | |
decoder_dropout: 0.1 | |
decoder_ffn_inner_dim: 1024 | |
decoder_num_heads: 8 | |
decoder_num_layers: 12 | |
decoder_num_units: 128 | |
decoder_prenet_units: [256, 256] | |
decoder_relu_dropout: 0.1 | |
dur_pred_lstm_units: 128 | |
dur_pred_prenet_units: [128, 128] | |
embedding_dim: 512 | |
emotion_units: 32 | |
encoder_attention_dropout: 0.1 | |
encoder_dropout: 0.1 | |
encoder_ffn_inner_dim: 1024 | |
encoder_num_heads: 8 | |
encoder_num_layers: 8 | |
encoder_num_units: 128 | |
encoder_projection_units: 32 | |
encoder_relu_dropout: 0.1 | |
max_len: 800 | |
nsf_f0_global_maximum: 730.0 | |
nsf_f0_global_minimum: 30.0 | |
nsf_norm_type: global | |
num_mels: 82 | |
outputs_per_step: 3 | |
postnet_dropout: 0.1 | |
postnet_ffn_inner_dim: 512 | |
postnet_filter_size: 41 | |
postnet_fsmn_num_layers: 4 | |
postnet_lstm_units: 128 | |
postnet_num_memory_units: 256 | |
postnet_shift: 17 | |
predictor_dropout: 0.1 | |
predictor_ffn_inner_dim: 256 | |
predictor_filter_size: 41 | |
predictor_fsmn_num_layers: 3 | |
predictor_lstm_units: 128 | |
predictor_num_memory_units: 128 | |
predictor_shift: 0 | |
speaker_units: 192 | |
scheduler: | |
params: {warmup_steps: 4000} | |
type: NoamLR | |
allow_cache: false | |
audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0, | |
n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true, | |
preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true, | |
trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000} | |
batch_size: 32 | |
create_time: '2023-07-08 01:06:41' | |
eval_interval_steps: 10000000000000000 | |
git_revision_hash: d16755444c9baf23348213211a5ed9035458ecf0 | |
grad_norm: 1.0 | |
linguistic_unit: {cleaners: english_cleaners, lfeat_type_list: 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category', | |
speaker_list: F7} | |
log_interval: 10 | |
log_interval_steps: 50 | |
model_type: sambert | |
modelscope_version: 1.7.1 | |
num_save_intermediate_results: 4 | |
num_workers: 4 | |
pin_memory: false | |
remove_short_samples: false | |
save_interval_steps: 500 | |
train_max_steps: 2400502 | |
train_steps: 502 | |