upload config

Files changed (5) hide show

config/decoder.yaml ADDED Viewed

+dim: 384
+decoder_config:
+  idim: ${dim}
+  odim: ${dim}
+  hidden: 512
+  n_layer: 12
+  bn_dim: 128
+vq_config: null

config/dvae.yaml ADDED Viewed

+dim: 512
+decoder_config:
+  idim: ${dim}
+  odim: ${dim}
+  n_layer: 12
+  bn_dim: 128
+vq_config:
+  dim: 1024
+  levels: [5,5,5,5]
+  G: 2
+  R: 2

config/gpt.yaml ADDED Viewed

+num_audio_tokens: 626
+num_text_tokens: 21178
+gpt_config:
+  hidden_size: 768
+  intermediate_size: 3072
+  num_attention_heads: 12
+  num_hidden_layers: 20
+  use_cache: False
+  max_position_embeddings: 4096
+  # attn_implementation: flash_attention_2
+  spk_emb_dim: 192
+  spk_KL: False
+  num_audio_tokens: 626
+  num_text_tokens: null
+  num_vq: 4

config/path.yaml ADDED Viewed

+vocos_config_path: config/vocos.yaml
+vocos_ckpt_path: asset/Vocos.pt
+dvae_config_path: config/dvae.yaml
+dvae_ckpt_path: asset/DVAE.pt
+gpt_config_path: config/gpt.yaml
+gpt_ckpt_path: asset/GPT.pt
+decoder_config_path: config/decoder.yaml
+decoder_ckpt_path: asset/Decoder.pt
+tokenizer_path: asset/tokenizer.pt

config/vocos.yaml ADDED Viewed

+feature_extractor:
+  class_path: vocos.feature_extractors.MelSpectrogramFeatures
+  init_args:
+    sample_rate: 24000
+    n_fft: 1024
+    hop_length: 256
+    n_mels: 100
+    padding: center
+backbone:
+  class_path: vocos.models.VocosBackbone
+  init_args:
+    input_channels: 100
+    dim: 512
+    intermediate_dim: 1536
+    num_layers: 8
+head:
+  class_path: vocos.heads.ISTFTHead
+  init_args:
+    dim: 512
+    n_fft: 1024
+    hop_length: 256
+    padding: center