Niciu commited on
Commit
3d1b9ed
1 Parent(s): 5d8e3ed

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ợ": 0, "ạ": 1, "ẻ": 3, "ý": 4, "ỗ": 5, "ằ": 6, "ữ": 7, "x": 8, "ặ": 9, "ừ": 10, "ờ": 11, "í": 12, "s": 13, "ẽ": 14, "4": 15, "q": 16, "ỳ": 17, "ẵ": 18, "õ": 19, "ộ": 20, "e": 21, "p": 22, "ể": 23, "ơ": 24, "ổ": 25, "b": 26, "ầ": 27, "à": 28, "ỉ": 29, "ó": 30, "ớ": 31, "ễ": 32, "n": 33, "ở": 34, "m": 35, "ẳ": 36, "d": 37, "ã": 38, "r": 39, "c": 40, "ọ": 41, "ậ": 42, "ỹ": 43, "ù": 44, "ă": 45, "â": 46, "ứ": 47, "ủ": 48, "é": 49, "ắ": 50, "ĩ": 51, "ệ": 52, "á": 53, "ú": 54, "ự": 55, "ư": 56, "ụ": 57, "ẫ": 58, "ề": 59, "o": 60, "ố": 61, "y": 62, "ị": 63, "ẩ": 64, "ò": 65, "ử": 66, "ồ": 67, "ỵ": 68, "ỡ": 69, "t": 70, "ả": 71, "l": 72, "ỷ": 73, "v": 74, "ẹ": 75, "k": 76, "ê": 77, "è": 78, "ấ": 79, "ô": 80, "ũ": 81, "ế": 82, "a": 83, "ì": 84, "g": 85, "ỏ": 86, "h": 87, "i": 88, "u": 89, "đ": 90, "|": 2, "[UNK]": 91, "[PAD]": 92}