wav2vec2-base-960h-demo-colab / tokenizer_config.json
Yoru1010's picture
Upload tokenizer
6c04fff
raw
history blame contribute delete
No virus
6.07 kB
{
"added_tokens_decoder": {
"1": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "q",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"24": {
"content": "x",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"27": {
"content": "[unkown]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"28": {
"content": "[pad]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"82": {
"content": "j",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"170": {
"content": "v",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"253": {
"content": "f",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"301": {
"content": "p",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"329": {
"content": "'",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"332": {
"content": "b",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"345": {
"content": "k",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"413": {
"content": "c",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"544": {
"content": "g",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"575": {
"content": "m",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"629": {
"content": "w",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"646": {
"content": "d",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"816": {
"content": "u",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"827": {
"content": "y",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"876": {
"content": "l",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"979": {
"content": "r",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1135": {
"content": "s",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1282": {
"content": "n",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1369": {
"content": "h",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1437": {
"content": "i",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1666": {
"content": "a",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1929": {
"content": "t",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2017": {
"content": "o",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2333": {
"content": "e",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"4770": {
"content": "|",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_basic_tokenize": true,
"do_lower_case": true,
"mask_token": "[MASK]",
"model_max_length": 1000000000000000019884624838656,
"never_split": null,
"pad_token": "[pad]",
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[unknown]"
}