Spaces:
Sleeping
Sleeping
File size: 2,613 Bytes
778a50b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
Loss:
MelReconLoss:
enable: true
params: {loss_type: mae}
ProsodyReconLoss:
enable: true
params: {loss_type: mae}
Model:
KanTtsSAMBERT:
optimizer:
params:
betas: [0.9, 0.98]
eps: 1.0e-09
lr: 0.001
weight_decay: 0.0
type: Adam
params:
MAS: false
NSF: true
SE: true
decoder_attention_dropout: 0.1
decoder_dropout: 0.1
decoder_ffn_inner_dim: 1024
decoder_num_heads: 8
decoder_num_layers: 12
decoder_num_units: 128
decoder_prenet_units: [256, 256]
decoder_relu_dropout: 0.1
dur_pred_lstm_units: 128
dur_pred_prenet_units: [128, 128]
embedding_dim: 512
emotion_units: 32
encoder_attention_dropout: 0.1
encoder_dropout: 0.1
encoder_ffn_inner_dim: 1024
encoder_num_heads: 8
encoder_num_layers: 8
encoder_num_units: 128
encoder_projection_units: 32
encoder_relu_dropout: 0.1
max_len: 800
nsf_f0_global_maximum: 730.0
nsf_f0_global_minimum: 30.0
nsf_norm_type: global
num_mels: 82
outputs_per_step: 3
postnet_dropout: 0.1
postnet_ffn_inner_dim: 512
postnet_filter_size: 41
postnet_fsmn_num_layers: 4
postnet_lstm_units: 128
postnet_num_memory_units: 256
postnet_shift: 17
predictor_dropout: 0.1
predictor_ffn_inner_dim: 256
predictor_filter_size: 41
predictor_fsmn_num_layers: 3
predictor_lstm_units: 128
predictor_num_memory_units: 128
predictor_shift: 0
speaker_units: 192
scheduler:
params: {warmup_steps: 4000}
type: NoamLR
allow_cache: false
audio_config: {fmax: 8000.0, fmin: 0.0, hop_length: 200, max_norm: 1.0, min_level_db: -100.0,
n_fft: 2048, n_mels: 80, norm_type: mean_std, num_workers: 16, phone_level_feature: true,
preemphasize: false, ref_level_db: 20, sampling_rate: 16000, symmetric: false, trim_silence: true,
trim_silence_threshold_db: 60, wav_normalize: true, win_length: 1000}
batch_size: 32
create_time: '2023-07-08 01:06:41'
eval_interval_steps: 10000000000000000
git_revision_hash: d16755444c9baf23348213211a5ed9035458ecf0
grad_norm: 1.0
linguistic_unit: {cleaners: english_cleaners, lfeat_type_list: 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category',
speaker_list: F7}
log_interval: 10
log_interval_steps: 50
model_type: sambert
modelscope_version: 1.7.1
num_save_intermediate_results: 4
num_workers: 4
pin_memory: false
remove_short_samples: false
save_interval_steps: 500
train_max_steps: 2400502
train_steps: 502
|