justinthelaw
commited on
Commit
•
edafd33
1
Parent(s):
fc7d4b2
modify tokenizer.json
Browse files- tokenizer.json +10 -9
tokenizer.json
CHANGED
@@ -205,6 +205,7 @@
|
|
205 |
"end_of_word_suffix": null,
|
206 |
"fuse_unk": true,
|
207 |
"byte_fallback": true,
|
|
|
208 |
"vocab": {
|
209 |
"<unk>": 0,
|
210 |
"<s>": 1,
|
@@ -8861,7 +8862,7 @@
|
|
8861 |
"\"?": 8652,
|
8862 |
"▁>>>": 8653,
|
8863 |
"Que": 8654,
|
8864 |
-
"
|
8865 |
"▁plain": 8656,
|
8866 |
"ativa": 8657,
|
8867 |
"ocker": 8658,
|
@@ -18097,7 +18098,7 @@
|
|
18097 |
"▁farm": 17888,
|
18098 |
"▁rôle": 17889,
|
18099 |
"▁статьи": 17890,
|
18100 |
-
"
|
18101 |
"subfigure": 17892,
|
18102 |
"èces": 17893,
|
18103 |
"ziel": 17894,
|
@@ -20452,7 +20453,7 @@
|
|
20452 |
"▁gcc": 20243,
|
20453 |
"▁scène": 20244,
|
20454 |
"Navigation": 20245,
|
20455 |
-
"▁
|
20456 |
"▁кан": 20247,
|
20457 |
"▁towns": 20248,
|
20458 |
"Username": 20249,
|
@@ -30287,7 +30288,7 @@
|
|
30287 |
"æ": 30078,
|
30288 |
"њ": 30079,
|
30289 |
" ": 30080,
|
30290 |
-
"
|
30291 |
"Э": 30082,
|
30292 |
"ë": 30083,
|
30293 |
"õ": 30084,
|
@@ -31559,7 +31560,7 @@
|
|
31559 |
"अ": 31350,
|
31560 |
"╔": 31351,
|
31561 |
"无": 31352,
|
31562 |
-
"
": 31353,
|
31563 |
"은": 31354,
|
31564 |
"ʷ": 31355,
|
31565 |
"那": 31356,
|
@@ -51125,7 +51126,7 @@
|
|
51125 |
"▁>> >",
|
51126 |
"Qu e",
|
51127 |
"Q ue",
|
51128 |
-
"
|
51129 |
"▁p lain",
|
51130 |
"▁pl ain",
|
51131 |
"▁pla in",
|
@@ -70350,7 +70351,7 @@
|
|
70350 |
"▁fa rm",
|
70351 |
"▁r ôle",
|
70352 |
"▁стать и",
|
70353 |
-
"
|
70354 |
"sub figure",
|
70355 |
"èce s",
|
70356 |
"è ces",
|
@@ -75013,7 +75014,7 @@
|
|
75013 |
"▁ gcc",
|
75014 |
"▁sc ène",
|
75015 |
"N avigation",
|
75016 |
-
"▁
|
75017 |
"▁к ан",
|
75018 |
"▁ка н",
|
75019 |
"▁ кан",
|
@@ -93459,4 +93460,4 @@
|
|
93459 |
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
|
93460 |
]
|
93461 |
}
|
93462 |
-
}
|
|
|
205 |
"end_of_word_suffix": null,
|
206 |
"fuse_unk": true,
|
207 |
"byte_fallback": true,
|
208 |
+
"ignore_merges": false,
|
209 |
"vocab": {
|
210 |
"<unk>": 0,
|
211 |
"<s>": 1,
|
|
|
8862 |
"\"?": 8652,
|
8863 |
"▁>>>": 8653,
|
8864 |
"Que": 8654,
|
8865 |
+
" ": 8655,
|
8866 |
"▁plain": 8656,
|
8867 |
"ativa": 8657,
|
8868 |
"ocker": 8658,
|
|
|
18098 |
"▁farm": 17888,
|
18099 |
"▁rôle": 17889,
|
18100 |
"▁статьи": 17890,
|
18101 |
+
" ": 17891,
|
18102 |
"subfigure": 17892,
|
18103 |
"èces": 17893,
|
18104 |
"ziel": 17894,
|
|
|
20453 |
"▁gcc": 20243,
|
20454 |
"▁scène": 20244,
|
20455 |
"Navigation": 20245,
|
20456 |
+
"▁ ": 20246,
|
20457 |
"▁кан": 20247,
|
20458 |
"▁towns": 20248,
|
20459 |
"Username": 20249,
|
|
|
30288 |
"æ": 30078,
|
30289 |
"њ": 30079,
|
30290 |
" ": 30080,
|
30291 |
+
" ": 30081,
|
30292 |
"Э": 30082,
|
30293 |
"ë": 30083,
|
30294 |
"õ": 30084,
|
|
|
31560 |
"अ": 31350,
|
31561 |
"╔": 31351,
|
31562 |
"无": 31352,
|
31563 |
+
"
": 31353,
|
31564 |
"은": 31354,
|
31565 |
"ʷ": 31355,
|
31566 |
"那": 31356,
|
|
|
51126 |
"▁>> >",
|
51127 |
"Qu e",
|
51128 |
"Q ue",
|
51129 |
+
" ",
|
51130 |
"▁p lain",
|
51131 |
"▁pl ain",
|
51132 |
"▁pla in",
|
|
|
70351 |
"▁fa rm",
|
70352 |
"▁r ôle",
|
70353 |
"▁стать и",
|
70354 |
+
" ",
|
70355 |
"sub figure",
|
70356 |
"èce s",
|
70357 |
"è ces",
|
|
|
75014 |
"▁ gcc",
|
75015 |
"▁sc ène",
|
75016 |
"N avigation",
|
75017 |
+
"▁ ",
|
75018 |
"▁к ан",
|
75019 |
"▁ка н",
|
75020 |
"▁ кан",
|
|
|
93460 |
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
|
93461 |
]
|
93462 |
}
|
93463 |
+
}
|