DrishtiSharma commited on
Commit
0841d8d
1 Parent(s): 7c8c8ad

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ଁ": 1, "ଂ": 2, "ଃ": 3, "ଅ": 4, "ଆ": 5, "ଇ": 6, "ଈ": 7, "ଉ": 8, "ଊ": 9, "ଏ": 10, "ଓ": 11, "କ": 12, "ଖ": 13, "ଗ": 14, "ଘ": 15, "ଙ": 16, "ଚ": 17, "ଛ": 18, "ଜ": 19, "ଝ": 20, "ଞ": 21, "ଟ": 22, "ଠ": 23, "ଡ": 24, "ଢ": 25, "ଣ": 26, "ତ": 27, "ଥ": 28, "ଦ": 29, "ଧ": 30, "ନ": 31, "ପ": 32, "ଫ": 33, "ବ": 34, "ଭ": 35, "ମ": 36, "ଯ": 37, "ର": 38, "ଲ": 39, "ଳ": 40, "ଵ": 41, "ଶ": 42, "ଷ": 43, "ସ": 44, "ହ": 45, "଼": 46, "ା": 47, "ି": 48, "ୀ": 49, "ୁ": 50, "ୂ": 51, "ୃ": 52, "େ": 53, "ୈ": 54, "ୋ": 55, "ୌ": 56, "୍": 57, "ୟ": 58, "ୱ": 59, "–": 60, "|": 0, "[UNK]": 61, "[PAD]": 62}