jmaczan commited on
Commit
546425d
1 Parent(s): 00944e0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +31 -0
tokenizer_config.json CHANGED
@@ -43,5 +43,36 @@
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
45
  "unk_token": "[UNK]",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "word_delimiter_token": "|"
47
  }
 
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
45
  "unk_token": "[UNK]",
46
+ "vocab_dict": {
47
+ "[PAD]": 28,
48
+ "[UNK]": 27,
49
+ "a": 1,
50
+ "b": 2,
51
+ "c": 3,
52
+ "d": 4,
53
+ "e": 5,
54
+ "f": 6,
55
+ "g": 7,
56
+ "h": 8,
57
+ "i": 9,
58
+ "j": 10,
59
+ "k": 11,
60
+ "l": 12,
61
+ "m": 13,
62
+ "n": 14,
63
+ "o": 15,
64
+ "p": 16,
65
+ "q": 17,
66
+ "r": 18,
67
+ "s": 19,
68
+ "t": 20,
69
+ "u": 21,
70
+ "v": 22,
71
+ "w": 23,
72
+ "x": 24,
73
+ "y": 25,
74
+ "z": 26,
75
+ "|": 0
76
+ },
77
  "word_delimiter_token": "|"
78
  }