pother
/

mms-tts-finetune-tha

Inference Endpoints

Model card Files Files and versions Community

pother commited on Aug 11

Commit

afa40e0

•

1 Parent(s): c643b36

Upload model

Files changed (2) hide show

config.json +4 -3
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "activation_dropout": 0.1,
   "architectures": [
-    "VitsModelForPreTraining"
   ],
   "attention_dropout": 0.1,
   "depth_separable_channels": 2,
@@ -51,7 +52,7 @@
   "noise_scale_duration": 0.8,
   "num_attention_heads": 2,
   "num_hidden_layers": 6,
-  "num_speakers": 1,
   "posterior_encoder_num_wavenet_layers": 16,
   "prior_encoder_num_flows": 4,
   "prior_encoder_num_wavenet_layers": 4,
@@ -79,7 +80,7 @@
   ],
   "sampling_rate": 16000,
   "segment_size": 8192,
-  "speaker_embedding_size": 0,
   "speaking_rate": 1.0,
   "spectrogram_bins": 513,
   "torch_dtype": "float32",

 {
+  "_name_or_path": "./tmp/vits_finetuned_validate",
   "activation_dropout": 0.1,
   "architectures": [
+    "VitsModel"
   ],
   "attention_dropout": 0.1,
   "depth_separable_channels": 2,
   "noise_scale_duration": 0.8,
   "num_attention_heads": 2,
   "num_hidden_layers": 6,
+  "num_speakers": 6,
   "posterior_encoder_num_wavenet_layers": 16,
   "prior_encoder_num_flows": 4,
   "prior_encoder_num_wavenet_layers": 4,
   ],
   "sampling_rate": 16000,
   "segment_size": 8192,
+  "speaker_embedding_size": 256,
   "speaking_rate": 1.0,
   "spectrogram_bins": 513,
   "torch_dtype": "float32",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cabfa3c771f470cc2207be3ce2432b587fdf219515b4fd0ecab6c1648fab6e3c
-size 332186056

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b340205c95036fd44c1c4091d6a62f003dc804a65846ba8c656cd27614af277
+size 158669560