byroneverson commited on
Commit
9e7e902
1 Parent(s): f509c4b

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +113 -6
tokenizer_config.json CHANGED
@@ -1,4 +1,10 @@
1
  {
 
 
 
 
 
 
2
  "added_tokens_decoder": {
3
  "151329": {
4
  "content": "<|endoftext|>",
@@ -7,14 +13,115 @@
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  }
11
  },
12
- "auto_map": {
13
- "AutoTokenizer": [
14
- "tokenization_chatglm.ChatGLM4Tokenizer",
15
- null
16
- ]
17
- },
18
  "chat_template": "{% for message in messages %}{% if loop.first %}[gMASK]<sop><|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
19
  "clean_up_tokenization_spaces": false,
20
  "do_lower_case": false,
 
1
  {
2
+ "auto_map": {
3
+ "AutoTokenizer": [
4
+ "tokenization_chatglm.ChatGLM4Tokenizer",
5
+ null
6
+ ]
7
+ },
8
  "added_tokens_decoder": {
9
  "151329": {
10
  "content": "<|endoftext|>",
 
13
  "rstrip": false,
14
  "single_word": false,
15
  "special": true
16
+ },
17
+ "151330": {
18
+ "content": "[MASK]",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false,
23
+ "special": true
24
+ },
25
+ "151331": {
26
+ "content": "[gMASK]",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false,
31
+ "special": true
32
+ },
33
+ "151332": {
34
+ "content": "[sMASK]",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false,
39
+ "special": true
40
+ },
41
+ "151333": {
42
+ "content": "<sop>",
43
+ "lstrip": false,
44
+ "normalized": false,
45
+ "rstrip": false,
46
+ "single_word": false,
47
+ "special": true
48
+ },
49
+ "151334": {
50
+ "content": "<eop>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false,
55
+ "special": true
56
+ },
57
+ "151335": {
58
+ "content": "<|system|>",
59
+ "lstrip": false,
60
+ "normalized": false,
61
+ "rstrip": false,
62
+ "single_word": false,
63
+ "special": true
64
+ },
65
+ "151336": {
66
+ "content": "<|user|>",
67
+ "lstrip": false,
68
+ "normalized": false,
69
+ "rstrip": false,
70
+ "single_word": false,
71
+ "special": true
72
+ },
73
+ "151337": {
74
+ "content": "<|assistant|>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false,
79
+ "special": true
80
+ },
81
+ "151338": {
82
+ "content": "<|observation|>",
83
+ "lstrip": false,
84
+ "normalized": false,
85
+ "rstrip": false,
86
+ "single_word": false,
87
+ "special": true
88
+ },
89
+ "151339": {
90
+ "content": "<|begin_of_image|>",
91
+ "lstrip": false,
92
+ "normalized": false,
93
+ "rstrip": false,
94
+ "single_word": false,
95
+ "special": true
96
+ },
97
+ "151340": {
98
+ "content": "<|end_of_image|>",
99
+ "lstrip": false,
100
+ "normalized": false,
101
+ "rstrip": false,
102
+ "single_word": false,
103
+ "special": true
104
+ },
105
+ "151341": {
106
+ "content": "<|begin_of_video|>",
107
+ "lstrip": false,
108
+ "normalized": false,
109
+ "rstrip": false,
110
+ "single_word": false,
111
+ "special": true
112
+ },
113
+ "151342": {
114
+ "content": "<|end_of_video|>",
115
+ "lstrip": false,
116
+ "normalized": false,
117
+ "rstrip": false,
118
+ "single_word": false,
119
+ "special": true
120
  }
121
  },
122
+ "additional_special_tokens": ["<|endoftext|>", "[MASK]", "[gMASK]", "[sMASK]", "<sop>", "<eop>", "<|system|>",
123
+ "<|user|>", "<|assistant|>", "<|observation|>", "<|begin_of_image|>", "<|end_of_image|>",
124
+ "<|begin_of_video|>", "<|end_of_video|>"],
 
 
 
125
  "chat_template": "{% for message in messages %}{% if loop.first %}[gMASK]<sop><|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
126
  "clean_up_tokenization_spaces": false,
127
  "do_lower_case": false,