shuyuej commited on
Commit
8d711ec
1 Parent(s): e09828b

Upload folder using huggingface_hub

Browse files
cl100k_base.tiktoken ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/Phi-3-small-128k-instruct",
3
+ "architectures": [
4
+ "Phi3SmallForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout_prob": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "microsoft/Phi-3-small-128k-instruct--configuration_phi3_small.Phi3SmallConfig",
10
+ "AutoModelForCausalLM": "microsoft/Phi-3-small-128k-instruct--modeling_phi3_small.Phi3SmallForCausalLM",
11
+ "AutoTokenizer": "microsoft/Phi-3-small-128k-instruct--tokenization_phi3_small.Phi3SmallTokenizer"
12
+ },
13
+ "blocksparse_block_size": 64,
14
+ "blocksparse_homo_head_pattern": false,
15
+ "blocksparse_num_local_blocks": 16,
16
+ "blocksparse_triton_kernel_block_size": 64,
17
+ "blocksparse_vert_stride": 8,
18
+ "bos_token_id": 100257,
19
+ "dense_attention_every_n_layers": 2,
20
+ "dummy_token_indices": [
21
+ 100256,
22
+ 100258,
23
+ 100259,
24
+ 100260,
25
+ 100264,
26
+ 100265,
27
+ 100267,
28
+ 100268,
29
+ 100269,
30
+ 100270,
31
+ 100271,
32
+ 100272,
33
+ 100273,
34
+ 100274,
35
+ 100275,
36
+ 100276,
37
+ 100277,
38
+ 100278,
39
+ 100279,
40
+ 100280,
41
+ 100281,
42
+ 100282,
43
+ 100283,
44
+ 100284,
45
+ 100285,
46
+ 100286,
47
+ 100287,
48
+ 100288,
49
+ 100289,
50
+ 100290,
51
+ 100291,
52
+ 100292,
53
+ 100293,
54
+ 100294,
55
+ 100295,
56
+ 100296,
57
+ 100297,
58
+ 100298,
59
+ 100299,
60
+ 100300,
61
+ 100301,
62
+ 100302,
63
+ 100303,
64
+ 100304,
65
+ 100305,
66
+ 100306,
67
+ 100307,
68
+ 100308,
69
+ 100309,
70
+ 100310,
71
+ 100311,
72
+ 100312,
73
+ 100313,
74
+ 100314,
75
+ 100315,
76
+ 100316,
77
+ 100317,
78
+ 100318,
79
+ 100319,
80
+ 100320,
81
+ 100321,
82
+ 100322,
83
+ 100323,
84
+ 100324,
85
+ 100325,
86
+ 100326,
87
+ 100327,
88
+ 100328,
89
+ 100329,
90
+ 100330,
91
+ 100331,
92
+ 100332,
93
+ 100333,
94
+ 100334,
95
+ 100335,
96
+ 100336,
97
+ 100337,
98
+ 100338,
99
+ 100339,
100
+ 100340,
101
+ 100341,
102
+ 100342,
103
+ 100343,
104
+ 100344,
105
+ 100345,
106
+ 100346,
107
+ 100347,
108
+ 100348,
109
+ 100349,
110
+ 100350,
111
+ 100351
112
+ ],
113
+ "embedding_dropout_prob": 0.1,
114
+ "eos_token_id": 100257,
115
+ "ff_dim_multiplier": null,
116
+ "ff_intermediate_size": 14336,
117
+ "ffn_dropout_prob": 0.1,
118
+ "gegelu_limit": 20.0,
119
+ "gegelu_pad_to_256": true,
120
+ "hidden_act": "gegelu",
121
+ "hidden_size": 4096,
122
+ "initializer_range": 0.02,
123
+ "layer_norm_epsilon": 1e-05,
124
+ "max_position_embeddings": 131072,
125
+ "model_type": "phi3small",
126
+ "mup_attn_multiplier": 1.0,
127
+ "mup_embedding_multiplier": 10.0,
128
+ "mup_use_scaling": true,
129
+ "mup_width_multiplier": 8.0,
130
+ "num_attention_heads": 32,
131
+ "num_hidden_layers": 32,
132
+ "num_key_value_heads": 8,
133
+ "original_max_position_embeddings": 8192,
134
+ "pad_sequence_to_multiple_of_64": true,
135
+ "quantization_config": {
136
+ "batch_size": 1,
137
+ "bits": 4,
138
+ "block_name_to_quantize": null,
139
+ "cache_block_outputs": true,
140
+ "damp_percent": 0.1,
141
+ "dataset": null,
142
+ "desc_act": false,
143
+ "exllama_config": {
144
+ "version": 1
145
+ },
146
+ "group_size": 128,
147
+ "max_input_length": null,
148
+ "model_seqlen": null,
149
+ "module_name_preceding_first_block": null,
150
+ "modules_in_block_to_quantize": null,
151
+ "pad_token_id": null,
152
+ "quant_method": "gptq",
153
+ "sym": true,
154
+ "tokenizer": null,
155
+ "true_sequential": true,
156
+ "use_cuda_fp16": false,
157
+ "use_exllama": true
158
+ },
159
+ "reorder_and_upcast_attn": false,
160
+ "rope_embedding_base": 1000000,
161
+ "rope_position_scale": 1.0,
162
+ "rope_scaling": {
163
+ "long_factor": [
164
+ 1.0,
165
+ 1.01,
166
+ 1.01,
167
+ 1.02,
168
+ 1.04,
169
+ 1.04,
170
+ 1.04,
171
+ 1.05,
172
+ 1.05,
173
+ 1.06,
174
+ 1.07,
175
+ 1.08,
176
+ 1.08,
177
+ 1.08,
178
+ 1.08,
179
+ 1.08,
180
+ 1.08,
181
+ 1.08,
182
+ 1.09,
183
+ 1.09,
184
+ 1.2,
185
+ 2.31,
186
+ 3.76,
187
+ 9.38,
188
+ 10.1,
189
+ 10.8,
190
+ 18.1,
191
+ 25.2,
192
+ 25.3,
193
+ 26.1,
194
+ 26.6,
195
+ 30.2,
196
+ 33.0,
197
+ 41.5,
198
+ 44.4,
199
+ 44.8,
200
+ 50.2,
201
+ 51.9,
202
+ 59.3,
203
+ 62.7,
204
+ 66.1,
205
+ 66.3,
206
+ 85.8,
207
+ 89.3,
208
+ 90.0,
209
+ 99.9,
210
+ 107.0,
211
+ 110.0,
212
+ 111.0,
213
+ 117.0,
214
+ 118.0,
215
+ 121.0,
216
+ 122.0,
217
+ 127.0,
218
+ 127.0,
219
+ 128.0,
220
+ 128.0,
221
+ 128.0,
222
+ 128.0,
223
+ 128.0,
224
+ 128.0,
225
+ 129.0,
226
+ 129.0,
227
+ 129.0
228
+ ],
229
+ "long_mscale": 1.1902380714238083,
230
+ "original_max_position_embeddings": 8192,
231
+ "short_factor": [
232
+ 1.02,
233
+ 1.02,
234
+ 1.05,
235
+ 1.05,
236
+ 1.06,
237
+ 1.08,
238
+ 1.08,
239
+ 1.08,
240
+ 1.08,
241
+ 1.12,
242
+ 1.1800000000000002,
243
+ 1.1900000000000002,
244
+ 1.1900000000000002,
245
+ 1.2100000000000002,
246
+ 1.2300000000000002,
247
+ 1.2400000000000002,
248
+ 1.2400000000000002,
249
+ 1.2500000000000002,
250
+ 1.3000000000000003,
251
+ 1.3100000000000003,
252
+ 1.4600000000000004,
253
+ 1.5100000000000005,
254
+ 1.7000000000000006,
255
+ 1.9300000000000008,
256
+ 2.080000000000001,
257
+ 2.4399999999999933,
258
+ 3.2199999999999767,
259
+ 3.4499999999999718,
260
+ 3.579999999999969,
261
+ 4.669999999999946,
262
+ 4.779999999999943,
263
+ 5.999999999999917,
264
+ 6.009999999999917,
265
+ 6.4199999999999084,
266
+ 6.619999999999904,
267
+ 7.189999999999892,
268
+ 7.3099999999998895,
269
+ 7.339999999999889,
270
+ 7.479999999999886,
271
+ 9.749999999999837,
272
+ 10.919999999999812,
273
+ 11.219999999999805,
274
+ 11.749999999999794,
275
+ 11.979999999999789,
276
+ 13.239999999999762,
277
+ 13.579999999999755,
278
+ 13.669999999999753,
279
+ 13.82999999999975,
280
+ 14.009999999999746,
281
+ 14.679999999999731,
282
+ 14.889999999999727,
283
+ 15.769999999999708,
284
+ 15.769999999999708,
285
+ 15.819999999999707,
286
+ 15.839999999999707,
287
+ 15.919999999999705,
288
+ 16.029999999999703,
289
+ 16.12999999999972,
290
+ 16.44999999999977,
291
+ 16.44999999999977,
292
+ 16.77999999999982,
293
+ 16.83999999999983,
294
+ 16.83999999999983,
295
+ 16.889999999999837
296
+ ],
297
+ "short_mscale": 1.0,
298
+ "type": "su"
299
+ },
300
+ "torch_dtype": "float16",
301
+ "transformers_version": "4.43.1",
302
+ "use_cache": true,
303
+ "vocab_size": 100352
304
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": [
5
+ 100257,
6
+ 100266
7
+ ],
8
+ "transformers_version": "4.43.1"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8949e40abcea32801640b0abcf6addb74ee6353980106675673868fdc80f7c2d
3
+ size 4455372080
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
5
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "f80aaa30bfc64c2b8ab214b541d9050e97163bc4",
3
+ "_from_auto": true,
4
+ "added_tokens_decoder": {},
5
+ "auto_map": {
6
+ "AutoTokenizer": [
7
+ "tokenization_phi3_small.Phi3SmallTokenizer",
8
+ "tokenization_phi3_small.Phi3SmallTokenizer"
9
+ ]
10
+ },
11
+ "bos_token": "<|endoftext|>",
12
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
13
+ "clean_up_tokenization_spaces": true,
14
+ "eos_token": "<|endoftext|>",
15
+ "model_max_length": 131072,
16
+ "pad_token": "<|endoftext|>",
17
+ "tokenizer_class": "Phi3SmallTokenizer",
18
+ "trust_remote_code": true
19
+ }