Commit
•
7452992
1
Parent(s):
568d9dd
Tokenizer fix (#1)
Browse files- Tokenizer fix (2bc192c0384d35cb1821ba2f14f8d33f7135f02f)
Co-authored-by: Boris Malashenko <[email protected]>
- tokenizer.json +5 -5
tokenizer.json
CHANGED
@@ -85,8 +85,8 @@
|
|
85 |
"pre_tokenizer": {
|
86 |
"type": "Metaspace",
|
87 |
"replacement": "▁",
|
88 |
-
"
|
89 |
-
"
|
90 |
},
|
91 |
"post_processor": {
|
92 |
"type": "TemplateProcessing",
|
@@ -172,8 +172,8 @@
|
|
172 |
"decoder": {
|
173 |
"type": "Metaspace",
|
174 |
"replacement": "▁",
|
175 |
-
"
|
176 |
-
"
|
177 |
},
|
178 |
"model": {
|
179 |
"type": "Unigram",
|
@@ -184846,4 +184846,4 @@
|
|
184846 |
],
|
184847 |
"byte_fallback": false
|
184848 |
}
|
184849 |
-
}
|
|
|
85 |
"pre_tokenizer": {
|
86 |
"type": "Metaspace",
|
87 |
"replacement": "▁",
|
88 |
+
"add_prefix_space": true,
|
89 |
+
"prepend_scheme": "always"
|
90 |
},
|
91 |
"post_processor": {
|
92 |
"type": "TemplateProcessing",
|
|
|
172 |
"decoder": {
|
173 |
"type": "Metaspace",
|
174 |
"replacement": "▁",
|
175 |
+
"add_prefix_space": true,
|
176 |
+
"prepend_scheme": "always"
|
177 |
},
|
178 |
"model": {
|
179 |
"type": "Unigram",
|
|
|
184846 |
],
|
184847 |
"byte_fallback": false
|
184848 |
}
|
184849 |
+
}
|