Nicolas Iglesias commited on
Commit
764ec8e
1 Parent(s): 4d45f57

Upload 8 files

Browse files
Files changed (5) hide show
  1. added_tokens.json +3 -143
  2. config.json +3 -3
  3. special_tokens_map.json +3 -21
  4. tokenizer.json +4 -1264
  5. tokenizer_config.json +37 -28
added_tokens.json CHANGED
@@ -1,145 +1,5 @@
1
  {
2
- "<|extratoken_100|>": 50356,
3
- "<|extratoken_101|>": 50357,
4
- "<|extratoken_102|>": 50358,
5
- "<|extratoken_103|>": 50359,
6
- "<|extratoken_104|>": 50360,
7
- "<|extratoken_105|>": 50361,
8
- "<|extratoken_106|>": 50362,
9
- "<|extratoken_107|>": 50363,
10
- "<|extratoken_108|>": 50364,
11
- "<|extratoken_109|>": 50365,
12
- "<|extratoken_10|>": 50266,
13
- "<|extratoken_110|>": 50366,
14
- "<|extratoken_111|>": 50367,
15
- "<|extratoken_112|>": 50368,
16
- "<|extratoken_113|>": 50369,
17
- "<|extratoken_114|>": 50370,
18
- "<|extratoken_115|>": 50371,
19
- "<|extratoken_116|>": 50372,
20
- "<|extratoken_117|>": 50373,
21
- "<|extratoken_118|>": 50374,
22
- "<|extratoken_119|>": 50375,
23
- "<|extratoken_11|>": 50267,
24
- "<|extratoken_120|>": 50376,
25
- "<|extratoken_121|>": 50377,
26
- "<|extratoken_122|>": 50378,
27
- "<|extratoken_123|>": 50379,
28
- "<|extratoken_124|>": 50380,
29
- "<|extratoken_125|>": 50381,
30
- "<|extratoken_126|>": 50382,
31
- "<|extratoken_127|>": 50383,
32
- "<|extratoken_128|>": 50384,
33
- "<|extratoken_129|>": 50385,
34
- "<|extratoken_12|>": 50268,
35
- "<|extratoken_130|>": 50386,
36
- "<|extratoken_131|>": 50387,
37
- "<|extratoken_132|>": 50388,
38
- "<|extratoken_133|>": 50389,
39
- "<|extratoken_134|>": 50390,
40
- "<|extratoken_135|>": 50391,
41
- "<|extratoken_136|>": 50392,
42
- "<|extratoken_137|>": 50393,
43
- "<|extratoken_138|>": 50394,
44
- "<|extratoken_139|>": 50395,
45
- "<|extratoken_13|>": 50269,
46
- "<|extratoken_140|>": 50396,
47
- "<|extratoken_141|>": 50397,
48
- "<|extratoken_142|>": 50398,
49
- "<|extratoken_143|>": 50399,
50
- "<|extratoken_14|>": 50270,
51
- "<|extratoken_15|>": 50271,
52
- "<|extratoken_16|>": 50272,
53
- "<|extratoken_17|>": 50273,
54
- "<|extratoken_18|>": 50274,
55
- "<|extratoken_19|>": 50275,
56
- "<|extratoken_1|>": 50257,
57
- "<|extratoken_20|>": 50276,
58
- "<|extratoken_21|>": 50277,
59
- "<|extratoken_22|>": 50278,
60
- "<|extratoken_23|>": 50279,
61
- "<|extratoken_24|>": 50280,
62
- "<|extratoken_25|>": 50281,
63
- "<|extratoken_26|>": 50282,
64
- "<|extratoken_27|>": 50283,
65
- "<|extratoken_28|>": 50284,
66
- "<|extratoken_29|>": 50285,
67
- "<|extratoken_2|>": 50258,
68
- "<|extratoken_30|>": 50286,
69
- "<|extratoken_31|>": 50287,
70
- "<|extratoken_32|>": 50288,
71
- "<|extratoken_33|>": 50289,
72
- "<|extratoken_34|>": 50290,
73
- "<|extratoken_35|>": 50291,
74
- "<|extratoken_36|>": 50292,
75
- "<|extratoken_37|>": 50293,
76
- "<|extratoken_38|>": 50294,
77
- "<|extratoken_39|>": 50295,
78
- "<|extratoken_3|>": 50259,
79
- "<|extratoken_40|>": 50296,
80
- "<|extratoken_41|>": 50297,
81
- "<|extratoken_42|>": 50298,
82
- "<|extratoken_43|>": 50299,
83
- "<|extratoken_44|>": 50300,
84
- "<|extratoken_45|>": 50301,
85
- "<|extratoken_46|>": 50302,
86
- "<|extratoken_47|>": 50303,
87
- "<|extratoken_48|>": 50304,
88
- "<|extratoken_49|>": 50305,
89
- "<|extratoken_4|>": 50260,
90
- "<|extratoken_50|>": 50306,
91
- "<|extratoken_51|>": 50307,
92
- "<|extratoken_52|>": 50308,
93
- "<|extratoken_53|>": 50309,
94
- "<|extratoken_54|>": 50310,
95
- "<|extratoken_55|>": 50311,
96
- "<|extratoken_56|>": 50312,
97
- "<|extratoken_57|>": 50313,
98
- "<|extratoken_58|>": 50314,
99
- "<|extratoken_59|>": 50315,
100
- "<|extratoken_5|>": 50261,
101
- "<|extratoken_60|>": 50316,
102
- "<|extratoken_61|>": 50317,
103
- "<|extratoken_62|>": 50318,
104
- "<|extratoken_63|>": 50319,
105
- "<|extratoken_64|>": 50320,
106
- "<|extratoken_65|>": 50321,
107
- "<|extratoken_66|>": 50322,
108
- "<|extratoken_67|>": 50323,
109
- "<|extratoken_68|>": 50324,
110
- "<|extratoken_69|>": 50325,
111
- "<|extratoken_6|>": 50262,
112
- "<|extratoken_70|>": 50326,
113
- "<|extratoken_71|>": 50327,
114
- "<|extratoken_72|>": 50328,
115
- "<|extratoken_73|>": 50329,
116
- "<|extratoken_74|>": 50330,
117
- "<|extratoken_75|>": 50331,
118
- "<|extratoken_76|>": 50332,
119
- "<|extratoken_77|>": 50333,
120
- "<|extratoken_78|>": 50334,
121
- "<|extratoken_79|>": 50335,
122
- "<|extratoken_7|>": 50263,
123
- "<|extratoken_80|>": 50336,
124
- "<|extratoken_81|>": 50337,
125
- "<|extratoken_82|>": 50338,
126
- "<|extratoken_83|>": 50339,
127
- "<|extratoken_84|>": 50340,
128
- "<|extratoken_85|>": 50341,
129
- "<|extratoken_86|>": 50342,
130
- "<|extratoken_87|>": 50343,
131
- "<|extratoken_88|>": 50344,
132
- "<|extratoken_89|>": 50345,
133
- "<|extratoken_8|>": 50264,
134
- "<|extratoken_90|>": 50346,
135
- "<|extratoken_91|>": 50347,
136
- "<|extratoken_92|>": 50348,
137
- "<|extratoken_93|>": 50349,
138
- "<|extratoken_94|>": 50350,
139
- "<|extratoken_95|>": 50351,
140
- "<|extratoken_96|>": 50352,
141
- "<|extratoken_97|>": 50353,
142
- "<|extratoken_98|>": 50354,
143
- "<|extratoken_99|>": 50355,
144
- "<|extratoken_9|>": 50265
145
  }
 
1
  {
2
+ "<|vulgarg|>": 50257,
3
+ "[/INST]": 50259,
4
+ "[INST]": 50258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTJForCausalLM"
@@ -45,8 +45,8 @@
45
  },
46
  "tie_word_embeddings": false,
47
  "tokenizer_class": "GPT2Tokenizer",
48
- "torch_dtype": "float16",
49
  "transformers_version": "4.34.0.dev0",
50
  "use_cache": true,
51
- "vocab_size": 50400
52
  }
 
1
  {
2
+ "_name_or_path": "/home/nico/dev/projects/ai/zenos/zenos-gpt-j-6B-alpaca-evol-4bit-tmp",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTJForCausalLM"
 
45
  },
46
  "tie_word_embeddings": false,
47
  "tokenizer_class": "GPT2Tokenizer",
48
+ "torch_dtype": "bfloat16",
49
  "transformers_version": "4.34.0.dev0",
50
  "use_cache": true,
51
+ "vocab_size": 50260
52
  }
special_tokens_map.json CHANGED
@@ -1,23 +1,5 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
tokenizer.json CHANGED
@@ -9,12 +9,12 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
16
  "id": 50257,
17
- "content": "<|extratoken_1|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 50258,
26
- "content": "<|extratoken_2|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,1267 +32,7 @@
32
  },
33
  {
34
  "id": 50259,
35
- "content": "<|extratoken_3|>",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": true,
40
- "special": false
41
- },
42
- {
43
- "id": 50260,
44
- "content": "<|extratoken_4|>",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": true,
49
- "special": false
50
- },
51
- {
52
- "id": 50261,
53
- "content": "<|extratoken_5|>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": true,
58
- "special": false
59
- },
60
- {
61
- "id": 50262,
62
- "content": "<|extratoken_6|>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": true,
67
- "special": false
68
- },
69
- {
70
- "id": 50263,
71
- "content": "<|extratoken_7|>",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": true,
76
- "special": false
77
- },
78
- {
79
- "id": 50264,
80
- "content": "<|extratoken_8|>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": true,
85
- "special": false
86
- },
87
- {
88
- "id": 50265,
89
- "content": "<|extratoken_9|>",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": true,
94
- "special": false
95
- },
96
- {
97
- "id": 50266,
98
- "content": "<|extratoken_10|>",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": true,
103
- "special": false
104
- },
105
- {
106
- "id": 50267,
107
- "content": "<|extratoken_11|>",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": true,
112
- "special": false
113
- },
114
- {
115
- "id": 50268,
116
- "content": "<|extratoken_12|>",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": true,
121
- "special": false
122
- },
123
- {
124
- "id": 50269,
125
- "content": "<|extratoken_13|>",
126
- "single_word": false,
127
- "lstrip": false,
128
- "rstrip": false,
129
- "normalized": true,
130
- "special": false
131
- },
132
- {
133
- "id": 50270,
134
- "content": "<|extratoken_14|>",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": true,
139
- "special": false
140
- },
141
- {
142
- "id": 50271,
143
- "content": "<|extratoken_15|>",
144
- "single_word": false,
145
- "lstrip": false,
146
- "rstrip": false,
147
- "normalized": true,
148
- "special": false
149
- },
150
- {
151
- "id": 50272,
152
- "content": "<|extratoken_16|>",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": true,
157
- "special": false
158
- },
159
- {
160
- "id": 50273,
161
- "content": "<|extratoken_17|>",
162
- "single_word": false,
163
- "lstrip": false,
164
- "rstrip": false,
165
- "normalized": true,
166
- "special": false
167
- },
168
- {
169
- "id": 50274,
170
- "content": "<|extratoken_18|>",
171
- "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": true,
175
- "special": false
176
- },
177
- {
178
- "id": 50275,
179
- "content": "<|extratoken_19|>",
180
- "single_word": false,
181
- "lstrip": false,
182
- "rstrip": false,
183
- "normalized": true,
184
- "special": false
185
- },
186
- {
187
- "id": 50276,
188
- "content": "<|extratoken_20|>",
189
- "single_word": false,
190
- "lstrip": false,
191
- "rstrip": false,
192
- "normalized": true,
193
- "special": false
194
- },
195
- {
196
- "id": 50277,
197
- "content": "<|extratoken_21|>",
198
- "single_word": false,
199
- "lstrip": false,
200
- "rstrip": false,
201
- "normalized": true,
202
- "special": false
203
- },
204
- {
205
- "id": 50278,
206
- "content": "<|extratoken_22|>",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": true,
211
- "special": false
212
- },
213
- {
214
- "id": 50279,
215
- "content": "<|extratoken_23|>",
216
- "single_word": false,
217
- "lstrip": false,
218
- "rstrip": false,
219
- "normalized": true,
220
- "special": false
221
- },
222
- {
223
- "id": 50280,
224
- "content": "<|extratoken_24|>",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": true,
229
- "special": false
230
- },
231
- {
232
- "id": 50281,
233
- "content": "<|extratoken_25|>",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": true,
238
- "special": false
239
- },
240
- {
241
- "id": 50282,
242
- "content": "<|extratoken_26|>",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": true,
247
- "special": false
248
- },
249
- {
250
- "id": 50283,
251
- "content": "<|extratoken_27|>",
252
- "single_word": false,
253
- "lstrip": false,
254
- "rstrip": false,
255
- "normalized": true,
256
- "special": false
257
- },
258
- {
259
- "id": 50284,
260
- "content": "<|extratoken_28|>",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": true,
265
- "special": false
266
- },
267
- {
268
- "id": 50285,
269
- "content": "<|extratoken_29|>",
270
- "single_word": false,
271
- "lstrip": false,
272
- "rstrip": false,
273
- "normalized": true,
274
- "special": false
275
- },
276
- {
277
- "id": 50286,
278
- "content": "<|extratoken_30|>",
279
- "single_word": false,
280
- "lstrip": false,
281
- "rstrip": false,
282
- "normalized": true,
283
- "special": false
284
- },
285
- {
286
- "id": 50287,
287
- "content": "<|extratoken_31|>",
288
- "single_word": false,
289
- "lstrip": false,
290
- "rstrip": false,
291
- "normalized": true,
292
- "special": false
293
- },
294
- {
295
- "id": 50288,
296
- "content": "<|extratoken_32|>",
297
- "single_word": false,
298
- "lstrip": false,
299
- "rstrip": false,
300
- "normalized": true,
301
- "special": false
302
- },
303
- {
304
- "id": 50289,
305
- "content": "<|extratoken_33|>",
306
- "single_word": false,
307
- "lstrip": false,
308
- "rstrip": false,
309
- "normalized": true,
310
- "special": false
311
- },
312
- {
313
- "id": 50290,
314
- "content": "<|extratoken_34|>",
315
- "single_word": false,
316
- "lstrip": false,
317
- "rstrip": false,
318
- "normalized": true,
319
- "special": false
320
- },
321
- {
322
- "id": 50291,
323
- "content": "<|extratoken_35|>",
324
- "single_word": false,
325
- "lstrip": false,
326
- "rstrip": false,
327
- "normalized": true,
328
- "special": false
329
- },
330
- {
331
- "id": 50292,
332
- "content": "<|extratoken_36|>",
333
- "single_word": false,
334
- "lstrip": false,
335
- "rstrip": false,
336
- "normalized": true,
337
- "special": false
338
- },
339
- {
340
- "id": 50293,
341
- "content": "<|extratoken_37|>",
342
- "single_word": false,
343
- "lstrip": false,
344
- "rstrip": false,
345
- "normalized": true,
346
- "special": false
347
- },
348
- {
349
- "id": 50294,
350
- "content": "<|extratoken_38|>",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": true,
355
- "special": false
356
- },
357
- {
358
- "id": 50295,
359
- "content": "<|extratoken_39|>",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": true,
364
- "special": false
365
- },
366
- {
367
- "id": 50296,
368
- "content": "<|extratoken_40|>",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": true,
373
- "special": false
374
- },
375
- {
376
- "id": 50297,
377
- "content": "<|extratoken_41|>",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": true,
382
- "special": false
383
- },
384
- {
385
- "id": 50298,
386
- "content": "<|extratoken_42|>",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": true,
391
- "special": false
392
- },
393
- {
394
- "id": 50299,
395
- "content": "<|extratoken_43|>",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": true,
400
- "special": false
401
- },
402
- {
403
- "id": 50300,
404
- "content": "<|extratoken_44|>",
405
- "single_word": false,
406
- "lstrip": false,
407
- "rstrip": false,
408
- "normalized": true,
409
- "special": false
410
- },
411
- {
412
- "id": 50301,
413
- "content": "<|extratoken_45|>",
414
- "single_word": false,
415
- "lstrip": false,
416
- "rstrip": false,
417
- "normalized": true,
418
- "special": false
419
- },
420
- {
421
- "id": 50302,
422
- "content": "<|extratoken_46|>",
423
- "single_word": false,
424
- "lstrip": false,
425
- "rstrip": false,
426
- "normalized": true,
427
- "special": false
428
- },
429
- {
430
- "id": 50303,
431
- "content": "<|extratoken_47|>",
432
- "single_word": false,
433
- "lstrip": false,
434
- "rstrip": false,
435
- "normalized": true,
436
- "special": false
437
- },
438
- {
439
- "id": 50304,
440
- "content": "<|extratoken_48|>",
441
- "single_word": false,
442
- "lstrip": false,
443
- "rstrip": false,
444
- "normalized": true,
445
- "special": false
446
- },
447
- {
448
- "id": 50305,
449
- "content": "<|extratoken_49|>",
450
- "single_word": false,
451
- "lstrip": false,
452
- "rstrip": false,
453
- "normalized": true,
454
- "special": false
455
- },
456
- {
457
- "id": 50306,
458
- "content": "<|extratoken_50|>",
459
- "single_word": false,
460
- "lstrip": false,
461
- "rstrip": false,
462
- "normalized": true,
463
- "special": false
464
- },
465
- {
466
- "id": 50307,
467
- "content": "<|extratoken_51|>",
468
- "single_word": false,
469
- "lstrip": false,
470
- "rstrip": false,
471
- "normalized": true,
472
- "special": false
473
- },
474
- {
475
- "id": 50308,
476
- "content": "<|extratoken_52|>",
477
- "single_word": false,
478
- "lstrip": false,
479
- "rstrip": false,
480
- "normalized": true,
481
- "special": false
482
- },
483
- {
484
- "id": 50309,
485
- "content": "<|extratoken_53|>",
486
- "single_word": false,
487
- "lstrip": false,
488
- "rstrip": false,
489
- "normalized": true,
490
- "special": false
491
- },
492
- {
493
- "id": 50310,
494
- "content": "<|extratoken_54|>",
495
- "single_word": false,
496
- "lstrip": false,
497
- "rstrip": false,
498
- "normalized": true,
499
- "special": false
500
- },
501
- {
502
- "id": 50311,
503
- "content": "<|extratoken_55|>",
504
- "single_word": false,
505
- "lstrip": false,
506
- "rstrip": false,
507
- "normalized": true,
508
- "special": false
509
- },
510
- {
511
- "id": 50312,
512
- "content": "<|extratoken_56|>",
513
- "single_word": false,
514
- "lstrip": false,
515
- "rstrip": false,
516
- "normalized": true,
517
- "special": false
518
- },
519
- {
520
- "id": 50313,
521
- "content": "<|extratoken_57|>",
522
- "single_word": false,
523
- "lstrip": false,
524
- "rstrip": false,
525
- "normalized": true,
526
- "special": false
527
- },
528
- {
529
- "id": 50314,
530
- "content": "<|extratoken_58|>",
531
- "single_word": false,
532
- "lstrip": false,
533
- "rstrip": false,
534
- "normalized": true,
535
- "special": false
536
- },
537
- {
538
- "id": 50315,
539
- "content": "<|extratoken_59|>",
540
- "single_word": false,
541
- "lstrip": false,
542
- "rstrip": false,
543
- "normalized": true,
544
- "special": false
545
- },
546
- {
547
- "id": 50316,
548
- "content": "<|extratoken_60|>",
549
- "single_word": false,
550
- "lstrip": false,
551
- "rstrip": false,
552
- "normalized": true,
553
- "special": false
554
- },
555
- {
556
- "id": 50317,
557
- "content": "<|extratoken_61|>",
558
- "single_word": false,
559
- "lstrip": false,
560
- "rstrip": false,
561
- "normalized": true,
562
- "special": false
563
- },
564
- {
565
- "id": 50318,
566
- "content": "<|extratoken_62|>",
567
- "single_word": false,
568
- "lstrip": false,
569
- "rstrip": false,
570
- "normalized": true,
571
- "special": false
572
- },
573
- {
574
- "id": 50319,
575
- "content": "<|extratoken_63|>",
576
- "single_word": false,
577
- "lstrip": false,
578
- "rstrip": false,
579
- "normalized": true,
580
- "special": false
581
- },
582
- {
583
- "id": 50320,
584
- "content": "<|extratoken_64|>",
585
- "single_word": false,
586
- "lstrip": false,
587
- "rstrip": false,
588
- "normalized": true,
589
- "special": false
590
- },
591
- {
592
- "id": 50321,
593
- "content": "<|extratoken_65|>",
594
- "single_word": false,
595
- "lstrip": false,
596
- "rstrip": false,
597
- "normalized": true,
598
- "special": false
599
- },
600
- {
601
- "id": 50322,
602
- "content": "<|extratoken_66|>",
603
- "single_word": false,
604
- "lstrip": false,
605
- "rstrip": false,
606
- "normalized": true,
607
- "special": false
608
- },
609
- {
610
- "id": 50323,
611
- "content": "<|extratoken_67|>",
612
- "single_word": false,
613
- "lstrip": false,
614
- "rstrip": false,
615
- "normalized": true,
616
- "special": false
617
- },
618
- {
619
- "id": 50324,
620
- "content": "<|extratoken_68|>",
621
- "single_word": false,
622
- "lstrip": false,
623
- "rstrip": false,
624
- "normalized": true,
625
- "special": false
626
- },
627
- {
628
- "id": 50325,
629
- "content": "<|extratoken_69|>",
630
- "single_word": false,
631
- "lstrip": false,
632
- "rstrip": false,
633
- "normalized": true,
634
- "special": false
635
- },
636
- {
637
- "id": 50326,
638
- "content": "<|extratoken_70|>",
639
- "single_word": false,
640
- "lstrip": false,
641
- "rstrip": false,
642
- "normalized": true,
643
- "special": false
644
- },
645
- {
646
- "id": 50327,
647
- "content": "<|extratoken_71|>",
648
- "single_word": false,
649
- "lstrip": false,
650
- "rstrip": false,
651
- "normalized": true,
652
- "special": false
653
- },
654
- {
655
- "id": 50328,
656
- "content": "<|extratoken_72|>",
657
- "single_word": false,
658
- "lstrip": false,
659
- "rstrip": false,
660
- "normalized": true,
661
- "special": false
662
- },
663
- {
664
- "id": 50329,
665
- "content": "<|extratoken_73|>",
666
- "single_word": false,
667
- "lstrip": false,
668
- "rstrip": false,
669
- "normalized": true,
670
- "special": false
671
- },
672
- {
673
- "id": 50330,
674
- "content": "<|extratoken_74|>",
675
- "single_word": false,
676
- "lstrip": false,
677
- "rstrip": false,
678
- "normalized": true,
679
- "special": false
680
- },
681
- {
682
- "id": 50331,
683
- "content": "<|extratoken_75|>",
684
- "single_word": false,
685
- "lstrip": false,
686
- "rstrip": false,
687
- "normalized": true,
688
- "special": false
689
- },
690
- {
691
- "id": 50332,
692
- "content": "<|extratoken_76|>",
693
- "single_word": false,
694
- "lstrip": false,
695
- "rstrip": false,
696
- "normalized": true,
697
- "special": false
698
- },
699
- {
700
- "id": 50333,
701
- "content": "<|extratoken_77|>",
702
- "single_word": false,
703
- "lstrip": false,
704
- "rstrip": false,
705
- "normalized": true,
706
- "special": false
707
- },
708
- {
709
- "id": 50334,
710
- "content": "<|extratoken_78|>",
711
- "single_word": false,
712
- "lstrip": false,
713
- "rstrip": false,
714
- "normalized": true,
715
- "special": false
716
- },
717
- {
718
- "id": 50335,
719
- "content": "<|extratoken_79|>",
720
- "single_word": false,
721
- "lstrip": false,
722
- "rstrip": false,
723
- "normalized": true,
724
- "special": false
725
- },
726
- {
727
- "id": 50336,
728
- "content": "<|extratoken_80|>",
729
- "single_word": false,
730
- "lstrip": false,
731
- "rstrip": false,
732
- "normalized": true,
733
- "special": false
734
- },
735
- {
736
- "id": 50337,
737
- "content": "<|extratoken_81|>",
738
- "single_word": false,
739
- "lstrip": false,
740
- "rstrip": false,
741
- "normalized": true,
742
- "special": false
743
- },
744
- {
745
- "id": 50338,
746
- "content": "<|extratoken_82|>",
747
- "single_word": false,
748
- "lstrip": false,
749
- "rstrip": false,
750
- "normalized": true,
751
- "special": false
752
- },
753
- {
754
- "id": 50339,
755
- "content": "<|extratoken_83|>",
756
- "single_word": false,
757
- "lstrip": false,
758
- "rstrip": false,
759
- "normalized": true,
760
- "special": false
761
- },
762
- {
763
- "id": 50340,
764
- "content": "<|extratoken_84|>",
765
- "single_word": false,
766
- "lstrip": false,
767
- "rstrip": false,
768
- "normalized": true,
769
- "special": false
770
- },
771
- {
772
- "id": 50341,
773
- "content": "<|extratoken_85|>",
774
- "single_word": false,
775
- "lstrip": false,
776
- "rstrip": false,
777
- "normalized": true,
778
- "special": false
779
- },
780
- {
781
- "id": 50342,
782
- "content": "<|extratoken_86|>",
783
- "single_word": false,
784
- "lstrip": false,
785
- "rstrip": false,
786
- "normalized": true,
787
- "special": false
788
- },
789
- {
790
- "id": 50343,
791
- "content": "<|extratoken_87|>",
792
- "single_word": false,
793
- "lstrip": false,
794
- "rstrip": false,
795
- "normalized": true,
796
- "special": false
797
- },
798
- {
799
- "id": 50344,
800
- "content": "<|extratoken_88|>",
801
- "single_word": false,
802
- "lstrip": false,
803
- "rstrip": false,
804
- "normalized": true,
805
- "special": false
806
- },
807
- {
808
- "id": 50345,
809
- "content": "<|extratoken_89|>",
810
- "single_word": false,
811
- "lstrip": false,
812
- "rstrip": false,
813
- "normalized": true,
814
- "special": false
815
- },
816
- {
817
- "id": 50346,
818
- "content": "<|extratoken_90|>",
819
- "single_word": false,
820
- "lstrip": false,
821
- "rstrip": false,
822
- "normalized": true,
823
- "special": false
824
- },
825
- {
826
- "id": 50347,
827
- "content": "<|extratoken_91|>",
828
- "single_word": false,
829
- "lstrip": false,
830
- "rstrip": false,
831
- "normalized": true,
832
- "special": false
833
- },
834
- {
835
- "id": 50348,
836
- "content": "<|extratoken_92|>",
837
- "single_word": false,
838
- "lstrip": false,
839
- "rstrip": false,
840
- "normalized": true,
841
- "special": false
842
- },
843
- {
844
- "id": 50349,
845
- "content": "<|extratoken_93|>",
846
- "single_word": false,
847
- "lstrip": false,
848
- "rstrip": false,
849
- "normalized": true,
850
- "special": false
851
- },
852
- {
853
- "id": 50350,
854
- "content": "<|extratoken_94|>",
855
- "single_word": false,
856
- "lstrip": false,
857
- "rstrip": false,
858
- "normalized": true,
859
- "special": false
860
- },
861
- {
862
- "id": 50351,
863
- "content": "<|extratoken_95|>",
864
- "single_word": false,
865
- "lstrip": false,
866
- "rstrip": false,
867
- "normalized": true,
868
- "special": false
869
- },
870
- {
871
- "id": 50352,
872
- "content": "<|extratoken_96|>",
873
- "single_word": false,
874
- "lstrip": false,
875
- "rstrip": false,
876
- "normalized": true,
877
- "special": false
878
- },
879
- {
880
- "id": 50353,
881
- "content": "<|extratoken_97|>",
882
- "single_word": false,
883
- "lstrip": false,
884
- "rstrip": false,
885
- "normalized": true,
886
- "special": false
887
- },
888
- {
889
- "id": 50354,
890
- "content": "<|extratoken_98|>",
891
- "single_word": false,
892
- "lstrip": false,
893
- "rstrip": false,
894
- "normalized": true,
895
- "special": false
896
- },
897
- {
898
- "id": 50355,
899
- "content": "<|extratoken_99|>",
900
- "single_word": false,
901
- "lstrip": false,
902
- "rstrip": false,
903
- "normalized": true,
904
- "special": false
905
- },
906
- {
907
- "id": 50356,
908
- "content": "<|extratoken_100|>",
909
- "single_word": false,
910
- "lstrip": false,
911
- "rstrip": false,
912
- "normalized": true,
913
- "special": false
914
- },
915
- {
916
- "id": 50357,
917
- "content": "<|extratoken_101|>",
918
- "single_word": false,
919
- "lstrip": false,
920
- "rstrip": false,
921
- "normalized": true,
922
- "special": false
923
- },
924
- {
925
- "id": 50358,
926
- "content": "<|extratoken_102|>",
927
- "single_word": false,
928
- "lstrip": false,
929
- "rstrip": false,
930
- "normalized": true,
931
- "special": false
932
- },
933
- {
934
- "id": 50359,
935
- "content": "<|extratoken_103|>",
936
- "single_word": false,
937
- "lstrip": false,
938
- "rstrip": false,
939
- "normalized": true,
940
- "special": false
941
- },
942
- {
943
- "id": 50360,
944
- "content": "<|extratoken_104|>",
945
- "single_word": false,
946
- "lstrip": false,
947
- "rstrip": false,
948
- "normalized": true,
949
- "special": false
950
- },
951
- {
952
- "id": 50361,
953
- "content": "<|extratoken_105|>",
954
- "single_word": false,
955
- "lstrip": false,
956
- "rstrip": false,
957
- "normalized": true,
958
- "special": false
959
- },
960
- {
961
- "id": 50362,
962
- "content": "<|extratoken_106|>",
963
- "single_word": false,
964
- "lstrip": false,
965
- "rstrip": false,
966
- "normalized": true,
967
- "special": false
968
- },
969
- {
970
- "id": 50363,
971
- "content": "<|extratoken_107|>",
972
- "single_word": false,
973
- "lstrip": false,
974
- "rstrip": false,
975
- "normalized": true,
976
- "special": false
977
- },
978
- {
979
- "id": 50364,
980
- "content": "<|extratoken_108|>",
981
- "single_word": false,
982
- "lstrip": false,
983
- "rstrip": false,
984
- "normalized": true,
985
- "special": false
986
- },
987
- {
988
- "id": 50365,
989
- "content": "<|extratoken_109|>",
990
- "single_word": false,
991
- "lstrip": false,
992
- "rstrip": false,
993
- "normalized": true,
994
- "special": false
995
- },
996
- {
997
- "id": 50366,
998
- "content": "<|extratoken_110|>",
999
- "single_word": false,
1000
- "lstrip": false,
1001
- "rstrip": false,
1002
- "normalized": true,
1003
- "special": false
1004
- },
1005
- {
1006
- "id": 50367,
1007
- "content": "<|extratoken_111|>",
1008
- "single_word": false,
1009
- "lstrip": false,
1010
- "rstrip": false,
1011
- "normalized": true,
1012
- "special": false
1013
- },
1014
- {
1015
- "id": 50368,
1016
- "content": "<|extratoken_112|>",
1017
- "single_word": false,
1018
- "lstrip": false,
1019
- "rstrip": false,
1020
- "normalized": true,
1021
- "special": false
1022
- },
1023
- {
1024
- "id": 50369,
1025
- "content": "<|extratoken_113|>",
1026
- "single_word": false,
1027
- "lstrip": false,
1028
- "rstrip": false,
1029
- "normalized": true,
1030
- "special": false
1031
- },
1032
- {
1033
- "id": 50370,
1034
- "content": "<|extratoken_114|>",
1035
- "single_word": false,
1036
- "lstrip": false,
1037
- "rstrip": false,
1038
- "normalized": true,
1039
- "special": false
1040
- },
1041
- {
1042
- "id": 50371,
1043
- "content": "<|extratoken_115|>",
1044
- "single_word": false,
1045
- "lstrip": false,
1046
- "rstrip": false,
1047
- "normalized": true,
1048
- "special": false
1049
- },
1050
- {
1051
- "id": 50372,
1052
- "content": "<|extratoken_116|>",
1053
- "single_word": false,
1054
- "lstrip": false,
1055
- "rstrip": false,
1056
- "normalized": true,
1057
- "special": false
1058
- },
1059
- {
1060
- "id": 50373,
1061
- "content": "<|extratoken_117|>",
1062
- "single_word": false,
1063
- "lstrip": false,
1064
- "rstrip": false,
1065
- "normalized": true,
1066
- "special": false
1067
- },
1068
- {
1069
- "id": 50374,
1070
- "content": "<|extratoken_118|>",
1071
- "single_word": false,
1072
- "lstrip": false,
1073
- "rstrip": false,
1074
- "normalized": true,
1075
- "special": false
1076
- },
1077
- {
1078
- "id": 50375,
1079
- "content": "<|extratoken_119|>",
1080
- "single_word": false,
1081
- "lstrip": false,
1082
- "rstrip": false,
1083
- "normalized": true,
1084
- "special": false
1085
- },
1086
- {
1087
- "id": 50376,
1088
- "content": "<|extratoken_120|>",
1089
- "single_word": false,
1090
- "lstrip": false,
1091
- "rstrip": false,
1092
- "normalized": true,
1093
- "special": false
1094
- },
1095
- {
1096
- "id": 50377,
1097
- "content": "<|extratoken_121|>",
1098
- "single_word": false,
1099
- "lstrip": false,
1100
- "rstrip": false,
1101
- "normalized": true,
1102
- "special": false
1103
- },
1104
- {
1105
- "id": 50378,
1106
- "content": "<|extratoken_122|>",
1107
- "single_word": false,
1108
- "lstrip": false,
1109
- "rstrip": false,
1110
- "normalized": true,
1111
- "special": false
1112
- },
1113
- {
1114
- "id": 50379,
1115
- "content": "<|extratoken_123|>",
1116
- "single_word": false,
1117
- "lstrip": false,
1118
- "rstrip": false,
1119
- "normalized": true,
1120
- "special": false
1121
- },
1122
- {
1123
- "id": 50380,
1124
- "content": "<|extratoken_124|>",
1125
- "single_word": false,
1126
- "lstrip": false,
1127
- "rstrip": false,
1128
- "normalized": true,
1129
- "special": false
1130
- },
1131
- {
1132
- "id": 50381,
1133
- "content": "<|extratoken_125|>",
1134
- "single_word": false,
1135
- "lstrip": false,
1136
- "rstrip": false,
1137
- "normalized": true,
1138
- "special": false
1139
- },
1140
- {
1141
- "id": 50382,
1142
- "content": "<|extratoken_126|>",
1143
- "single_word": false,
1144
- "lstrip": false,
1145
- "rstrip": false,
1146
- "normalized": true,
1147
- "special": false
1148
- },
1149
- {
1150
- "id": 50383,
1151
- "content": "<|extratoken_127|>",
1152
- "single_word": false,
1153
- "lstrip": false,
1154
- "rstrip": false,
1155
- "normalized": true,
1156
- "special": false
1157
- },
1158
- {
1159
- "id": 50384,
1160
- "content": "<|extratoken_128|>",
1161
- "single_word": false,
1162
- "lstrip": false,
1163
- "rstrip": false,
1164
- "normalized": true,
1165
- "special": false
1166
- },
1167
- {
1168
- "id": 50385,
1169
- "content": "<|extratoken_129|>",
1170
- "single_word": false,
1171
- "lstrip": false,
1172
- "rstrip": false,
1173
- "normalized": true,
1174
- "special": false
1175
- },
1176
- {
1177
- "id": 50386,
1178
- "content": "<|extratoken_130|>",
1179
- "single_word": false,
1180
- "lstrip": false,
1181
- "rstrip": false,
1182
- "normalized": true,
1183
- "special": false
1184
- },
1185
- {
1186
- "id": 50387,
1187
- "content": "<|extratoken_131|>",
1188
- "single_word": false,
1189
- "lstrip": false,
1190
- "rstrip": false,
1191
- "normalized": true,
1192
- "special": false
1193
- },
1194
- {
1195
- "id": 50388,
1196
- "content": "<|extratoken_132|>",
1197
- "single_word": false,
1198
- "lstrip": false,
1199
- "rstrip": false,
1200
- "normalized": true,
1201
- "special": false
1202
- },
1203
- {
1204
- "id": 50389,
1205
- "content": "<|extratoken_133|>",
1206
- "single_word": false,
1207
- "lstrip": false,
1208
- "rstrip": false,
1209
- "normalized": true,
1210
- "special": false
1211
- },
1212
- {
1213
- "id": 50390,
1214
- "content": "<|extratoken_134|>",
1215
- "single_word": false,
1216
- "lstrip": false,
1217
- "rstrip": false,
1218
- "normalized": true,
1219
- "special": false
1220
- },
1221
- {
1222
- "id": 50391,
1223
- "content": "<|extratoken_135|>",
1224
- "single_word": false,
1225
- "lstrip": false,
1226
- "rstrip": false,
1227
- "normalized": true,
1228
- "special": false
1229
- },
1230
- {
1231
- "id": 50392,
1232
- "content": "<|extratoken_136|>",
1233
- "single_word": false,
1234
- "lstrip": false,
1235
- "rstrip": false,
1236
- "normalized": true,
1237
- "special": false
1238
- },
1239
- {
1240
- "id": 50393,
1241
- "content": "<|extratoken_137|>",
1242
- "single_word": false,
1243
- "lstrip": false,
1244
- "rstrip": false,
1245
- "normalized": true,
1246
- "special": false
1247
- },
1248
- {
1249
- "id": 50394,
1250
- "content": "<|extratoken_138|>",
1251
- "single_word": false,
1252
- "lstrip": false,
1253
- "rstrip": false,
1254
- "normalized": true,
1255
- "special": false
1256
- },
1257
- {
1258
- "id": 50395,
1259
- "content": "<|extratoken_139|>",
1260
- "single_word": false,
1261
- "lstrip": false,
1262
- "rstrip": false,
1263
- "normalized": true,
1264
- "special": false
1265
- },
1266
- {
1267
- "id": 50396,
1268
- "content": "<|extratoken_140|>",
1269
- "single_word": false,
1270
- "lstrip": false,
1271
- "rstrip": false,
1272
- "normalized": true,
1273
- "special": false
1274
- },
1275
- {
1276
- "id": 50397,
1277
- "content": "<|extratoken_141|>",
1278
- "single_word": false,
1279
- "lstrip": false,
1280
- "rstrip": false,
1281
- "normalized": true,
1282
- "special": false
1283
- },
1284
- {
1285
- "id": 50398,
1286
- "content": "<|extratoken_142|>",
1287
- "single_word": false,
1288
- "lstrip": false,
1289
- "rstrip": false,
1290
- "normalized": true,
1291
- "special": false
1292
- },
1293
- {
1294
- "id": 50399,
1295
- "content": "<|extratoken_143|>",
1296
  "single_word": false,
1297
  "lstrip": false,
1298
  "rstrip": false,
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
16
  "id": 50257,
17
+ "content": "<|vulgarg|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 50258,
26
+ "content": "[INST]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 50259,
35
+ "content": "[/INST]",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -1,35 +1,44 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
 
 
12
  "clean_up_tokenization_spaces": true,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "<|endoftext|>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
- "errors": "replace",
22
- "max_length": 2048,
23
  "model_max_length": 2048,
24
- "pad_token": null,
25
- "padding": true,
26
  "tokenizer_class": "GPT2Tokenizer",
27
- "unk_token": {
28
- "__type": "AddedToken",
29
- "content": "<|endoftext|>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false
34
- }
35
  }
 
1
  {
 
2
  "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": "<|vulgarg|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": "[INST]",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": "[/INST]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ }
36
  },
37
+ "additional_special_tokens": [],
38
+ "bos_token": "<|endoftext|>",
39
  "clean_up_tokenization_spaces": true,
40
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
41
  "model_max_length": 2048,
 
 
42
  "tokenizer_class": "GPT2Tokenizer",
43
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
44
  }