patrickvonplaten
commited on
Commit
•
c43348b
1
Parent(s):
9aa9e5f
add model
Browse files- config.json +86 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
config.json
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_dropout": 0.0,
|
3 |
+
"apply_spec_augment": true,
|
4 |
+
"architectures": [
|
5 |
+
"Wav2Vec2ForCTC"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"classifier_proj_size": 256,
|
10 |
+
"codevector_dim": 768,
|
11 |
+
"contrastive_logits_temperature": 0.1,
|
12 |
+
"conv_bias": true,
|
13 |
+
"conv_dim": [
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512
|
21 |
+
],
|
22 |
+
"conv_kernel": [
|
23 |
+
10,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
3,
|
28 |
+
2,
|
29 |
+
2
|
30 |
+
],
|
31 |
+
"conv_stride": [
|
32 |
+
5,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2,
|
38 |
+
2
|
39 |
+
],
|
40 |
+
"ctc_loss_reduction": "sum",
|
41 |
+
"ctc_zero_infinity": false,
|
42 |
+
"diversity_loss_weight": 0.1,
|
43 |
+
"do_stable_layer_norm": true,
|
44 |
+
"eos_token_id": 2,
|
45 |
+
"feat_extract_activation": "gelu",
|
46 |
+
"feat_extract_dropout": 0.0,
|
47 |
+
"feat_extract_norm": "layer",
|
48 |
+
"feat_proj_dropout": 0.1,
|
49 |
+
"feat_quantizer_dropout": 0.0,
|
50 |
+
"final_dropout": 0.0,
|
51 |
+
"gradient_checkpointing": false,
|
52 |
+
"hidden_act": "gelu",
|
53 |
+
"hidden_dropout": 0.1,
|
54 |
+
"hidden_size": 1024,
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 4096,
|
57 |
+
"layer_norm_eps": 1e-05,
|
58 |
+
"layerdrop": 0.1,
|
59 |
+
"mask_channel_length": 10,
|
60 |
+
"mask_channel_min_space": 1,
|
61 |
+
"mask_channel_other": 0.0,
|
62 |
+
"mask_channel_prob": 0.0,
|
63 |
+
"mask_channel_selection": "static",
|
64 |
+
"mask_feature_length": 10,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_space": 1,
|
68 |
+
"mask_time_other": 0.0,
|
69 |
+
"mask_time_prob": 0.075,
|
70 |
+
"mask_time_selection": "static",
|
71 |
+
"model_type": "wav2vec2",
|
72 |
+
"num_attention_heads": 16,
|
73 |
+
"num_codevector_groups": 2,
|
74 |
+
"num_codevectors_per_group": 320,
|
75 |
+
"num_conv_pos_embedding_groups": 16,
|
76 |
+
"num_conv_pos_embeddings": 128,
|
77 |
+
"num_feat_extract_layers": 7,
|
78 |
+
"num_hidden_layers": 24,
|
79 |
+
"num_negatives": 100,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"proj_codevector_dim": 768,
|
82 |
+
"torch_dtype": "float32",
|
83 |
+
"transformers_version": "4.13.0.dev0",
|
84 |
+
"use_weighted_layer_sum": false,
|
85 |
+
"vocab_size": 392
|
86 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3173bde9e9ce490fa0f989e413c42f25bc1820c020adc1e6b9b87025b3cfcc5e
|
3 |
+
size 1263535127
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 1, "<pad>": 0, "</s>": 2, "<unk>": 3, "n": 4, "s": 5, "t": 6, "ə": 7, "l": 8, "a": 9, "i": 10, "k": 11, "d": 12, "m": 13, "ɛ": 14, "ɾ": 15, "e": 16, "ɪ": 17, "p": 18, "o": 19, "ɐ": 20, "z": 21, "ð": 22, "f": 23, "j": 24, "v": 25, "b": 26, "ɹ": 27, "ʁ": 28, "ʊ": 29, "iː": 30, "r": 31, "w": 32, "ʌ": 33, "u": 34, "ɡ": 35, "æ": 36, "aɪ": 37, "ʃ": 38, "h": 39, "ɔ": 40, "ɑː": 41, "ŋ": 42, "ɚ": 43, "eɪ": 44, "β": 45, "uː": 46, "y": 47, "ɑ̃": 48, "oʊ": 49, "ᵻ": 50, "eː": 51, "θ": 52, "aʊ": 53, "ts": 54, "oː": 55, "ɔ̃": 56, "ɣ": 57, "ɜ": 58, "ɑ": 59, "dʒ": 60, "əl": 61, "x": 62, "ɜː": 63, "ç": 64, "ʒ": 65, "tʃ": 66, "ɔː": 67, "ɑːɹ": 68, "ɛ̃": 69, "ʎ": 70, "ɔːɹ": 71, "ʋ": 72, "aː": 73, "ɕ": 74, "œ": 75, "ø": 76, "oːɹ": 77, "ɲ": 78, "yː": 79, "ʔ": 80, "iə": 81, "i5": 82, "s.": 83, "tɕ": 84, "??": 85, "nʲ": 86, "ɛː": 87, "œ̃": 88, "ɭ": 89, "ɔø": 90, "ʑ": 91, "tʲ": 92, "ɨ": 93, "ɛɹ": 94, "ts.": 95, "rʲ": 96, "ɪɹ": 97, "ɭʲ": 98, "i.5": 99, "ɔɪ": 100, "q": 101, "sʲ": 102, "u5": 103, "ʊɹ": 104, "iɜ": 105, "a5": 106, "iɛ5": 107, "øː": 108, "ʕ": 109, "ja": 110, "əɜ": 111, "th": 112, "ɑ5": 113, "oɪ": 114, "dʲ": 115, "ə5": 116, "tɕh": 117, "ts.h": 118, "mʲ": 119, "ɯ": 120, "dʑ": 121, "vʲ": 122, "e̞": 123, "tʃʲ": 124, "ei5": 125, "o5": 126, "onɡ5": 127, "ɑu5": 128, "iɑ5": 129, "ai5": 130, "aɪɚ": 131, "kh": 132, "ə1": 133, "ʐ": 134, "i2": 135, "ʉ": 136, "ħ": 137, "t[": 138, "aɪə": 139, "ʲ": 140, "ju": 141, "ə2": 142, "u2": 143, "oɜ": 144, "pː": 145, "iɛɜ": 146, "ou5": 147, "y5": 148, "uɜ": 149, "tː": 150, "uo5": 151, "d[": 152, "uoɜ": 153, "tsh": 154, "ɑɜ": 155, "ɵ": 156, "i̪5": 157, "uei5": 158, "ɟ": 159, "aɜ": 160, "ɑɨ": 161, "i.ɜ": 162, "eʊ": 163, "o2": 164, "ɐ̃": 165, "ä": 166, "pʲ": 167, "kʲ": 168, "n̩": 169, "ɒ": 170, "ph": 171, "ɑu2": 172, "uɨ": 173, "əɪ": 174, "ɫ": 175, "ɬ": 176, "yɜ": 177, "bʲ": 178, "ɑ2": 179, "s̪": 180, "aiɜ": 181, "χ": 182, "ɐ̃ʊ̃": 183, "1": 184, "ə4": 185, "yæɜ": 186, "a2": 187, "ɨː": 188, "t̪": 189, "iouɜ": 190, "ũ": 191, "onɡɜ": 192, "aɨ": 193, "iɛ2": 194, "ɔɨ": 195, "ɑuɜ": 196, "o̞": 197, "ei2": 198, "iou2": 199, "c": 200, "kː": 201, "y2": 202, "ɖ": 203, "oe": 204, "dˤ": 205, "yɛɜ": 206, "əʊ": 207, "S": 208, "ɡʲ": 209, "onɡ2": 210, "u\"": 211, "eiɜ": 212, "ʈ": 213, "ɯᵝ": 214, "iou5": 215, "dZ": 216, "r̝̊": 217, "i.2": 218, "tS": 219, "s^": 220, "ʝ": 221, "yə5": 222, "iɑɜ": 223, "uə5": 224, "pf": 225, "ɨu": 226, "iɑ2": 227, "ou2": 228, "ər2": 229, "fʲ": 230, "ai2": 231, "r̝": 232, "uəɜ": 233, "ɳ": 234, "əɨ": 235, "ua5": 236, "uɪ": 237, "ɽ": 238, "bː": 239, "yu5": 240, "uo2": 241, "yɛ5": 242, "l̩": 243, "ɻ": 244, "ərɜ": 245, "ʂ": 246, "i̪2": 247, "ouɜ": 248, "uaɜ": 249, "a.": 250, "a.ː": 251, "yæ5": 252, "dː": 253, "r̩": 254, "ee": 255, "ɪu": 256, "ər5": 257, "i̪ɜ": 258, "æi": 259, "u:": 260, "i.ː": 261, "t^": 262, "o1": 263, "ɪ^": 264, "ai": 265, "ueiɜ": 266, "æː": 267, "ɛɪ": 268, "eə": 269, "i.": 270, "ɴ": 271, "ie": 272, "ua2": 273, "ɑ1": 274, "o4": 275, "tʃː": 276, "o:": 277, "ɑ:": 278, "u1": 279, "N": 280, "i̪1": 281, "au": 282, "yæ2": 283, "u.": 284, "qː": 285, "yəɜ": 286, "y:": 287, "kʰ": 288, "tʃʰ": 289, "iʊ": 290, "sx": 291, "õ": 292, "uo": 293, "tʰ": 294, "uai5": 295, "bʰ": 296, "u.ː": 297, "uə2": 298, "ʊə": 299, "d^": 300, "s̪ː": 301, "yiɜ": 302, "dʰ": 303, "r.": 304, "oe:": 305, "i1": 306, "ɟː": 307, "yu2": 308, "nʲʲ": 309, "i̪4": 310, "uei2": 311, "tsʲ": 312, "ɸ": 313, "ĩ": 314, "ɑ4": 315, "t̪ː": 316, "eɑ": 317, "u4": 318, "e:": 319, "tsː": 320, "ʈʰ": 321, "ɡʰ": 322, "ɯɯ": 323, "dʒʲ": 324, "ʂʲ": 325, "X": 326, "ɵː": 327, "uaiɜ": 328, "tɕʲ": 329, "ã": 330, "t^ː": 331, "ẽː": 332, "yɛ2": 333, "cː": 334, "i.1": 335, "ɛʊ": 336, "dˤdˤ": 337, "dʒː": 338, "i4": 339, "ɡː": 340, "yi": 341, "ɕʲ": 342, "ɟʰ": 343, "pʰ": 344, "dʑʲ": 345, "yuɜ": 346, "ua1": 347, "ua4": 348, "æiː": 349, "ɐɐ": 350, "ui": 351, "iou1": 352, "ʊː": 353, "a1": 354, "iou4": 355, "cʰ": 356, "iɛ1": 357, "yə2": 358, "ɖʰ": 359, "ẽ": 360, "ʒʲ": 361, "ää": 362, "ər4": 363, "iːː": 364, "ɪː": 365, "iɑ1": 366, "ər1": 367, "œː": 368, "øi": 369, "ɪuː": 370, "cʰcʰ": 371, "əː1": 372, "iː1": 373, "ũ": 374, "kʰː": 375, "o̞o̞": 376, "xʲ": 377, "ou1": 378, "iɛ4": 379, "e̞e̞": 380, "y1": 381, "dzː": 382, "dʲʲ": 383, "dʰː": 384, "ɯᵝɯᵝ": 385, "lː": 386, "uo1": 387, "i.4": 388, "i:": 389, "yɛ5ʲ": 390, "a4": 391}
|