riczhou commited on
Commit
0ae60e0
1 Parent(s): 217a59a

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
logs.txt ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/135 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/135 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/135 [00:03<?, ?it/s]
5
  1%| | 1/135 [00:04<09:19, 4.17s/it]
6
 
 
7
  1%| | 1/135 [00:04<09:19, 4.17s/it]
8
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
9
 
 
10
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
11
 
 
12
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
13
 
 
14
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
15
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
16
 
 
17
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
18
 
 
19
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
20
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
21
 
 
22
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
23
 
 
24
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
25
 
 
26
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
27
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
28
 
 
29
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
30
 
 
31
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
32
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
33
 
 
34
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
35
 
 
36
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
37
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
38
 
 
39
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
40
 
 
41
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
42
 
 
43
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
44
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
45
 
 
46
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
47
 
 
48
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
49
 
 
50
  13%|█▎ | 17/135 [00:06<00:15, 7.70it/s]
51
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
52
 
 
53
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
54
 
 
55
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
56
 
 
57
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
58
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
59
 
 
60
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
61
 
 
62
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
63
 
 
64
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
65
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
66
 
 
67
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
68
 
 
69
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
70
 
 
71
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
72
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
73
 
 
74
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
75
 
 
76
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
77
 
 
78
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
79
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
80
 
 
81
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
82
 
 
83
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
84
 
 
85
  24%|██▎ | 32/135 [00:07<00:06, 14.78it/s]
86
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
87
 
 
88
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
89
 
 
90
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
91
 
 
92
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
93
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
94
 
 
95
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
96
 
 
97
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
98
 
 
99
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
100
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
101
 
 
102
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
103
 
 
104
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
105
 
 
106
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
107
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
108
 
 
109
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
110
 
 
111
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
112
 
 
113
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
114
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
115
 
 
116
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
117
 
 
118
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
119
 
 
120
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
121
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
122
 
 
123
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
124
 
 
125
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
126
 
 
127
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
128
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
129
 
 
130
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
131
 
 
132
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
133
 
 
134
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
135
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
136
 
 
137
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
138
 
 
139
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
140
 
 
141
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
142
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
143
 
 
144
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
145
 
 
146
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
147
 
 
148
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
149
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
150
 
 
151
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
152
 
 
153
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
154
 
 
155
  46%|████▌ | 62/135 [00:09<00:04, 15.85it/s]
156
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
157
 
 
158
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
159
 
 
160
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
161
 
 
162
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
163
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
164
 
 
165
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
166
 
 
167
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
168
 
 
169
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
170
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
171
 
 
172
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
173
 
 
174
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
175
 
 
176
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
177
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
178
 
 
179
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
180
 
 
181
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
182
 
 
183
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
184
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
185
 
 
186
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
187
 
 
188
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
189
 
 
190
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
191
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
192
 
 
193
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
194
 
 
195
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
196
 
 
197
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
198
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
199
 
 
200
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
201
 
 
202
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
203
 
 
204
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
205
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
206
 
 
207
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
208
 
 
209
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
210
 
 
211
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
212
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
213
 
 
214
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
215
 
 
216
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
217
 
 
218
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
219
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
220
 
 
221
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
222
 
 
223
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
224
 
 
225
  68%|██████▊ | 92/135 [00:11<00:02, 16.18it/s]
226
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
227
 
 
228
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
229
 
 
230
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
231
 
 
232
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
233
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
234
 
 
235
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
236
 
 
237
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
238
 
 
239
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
240
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
241
 
 
242
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
243
 
 
244
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
245
 
 
246
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
247
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
248
 
 
249
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
250
 
 
251
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
252
 
 
253
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
254
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
255
 
 
256
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
257
 
 
258
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
259
 
 
260
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
261
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
262
 
 
263
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
264
 
 
265
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
266
 
 
267
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
268
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
269
 
 
270
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
271
 
 
272
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
273
 
 
274
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
275
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
276
 
 
277
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
278
 
 
279
  86%|██████���█▌ | 116/135 [00:12<00:01, 16.64it/s]
280
 
 
281
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
282
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
283
 
 
284
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
285
 
 
286
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
287
 
 
288
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
289
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
290
 
 
291
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
292
 
 
293
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
294
 
 
295
  90%|█████████ | 122/135 [00:13<00:00, 16.30it/s]
296
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
297
 
 
298
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
299
 
 
300
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
301
 
 
302
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
303
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
304
 
 
305
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
306
 
 
307
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
308
 
 
309
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
310
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
311
 
 
312
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
313
 
 
314
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
315
 
 
316
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
317
  99%|█████████▉| 134/135 [00:13<00:00, 16.33it/s]
318
 
 
319
  99%|█████████▉| 134/135 [00:13<00:00, 16.33it/s]
 
 
 
 
 
 
 
 
 
 
1
+ /opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/TinyLlama-1.1B-Chat-v1.0 --quantization q0f16 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC
2
+ [2024-06-04 20:08:25] INFO auto_config.py:116: Found model configuration: /models/TinyLlama-1.1B-Chat-v1.0/config.json
3
+ [2024-06-04 20:08:25] INFO auto_config.py:154: Found model type: llama. Use `--model-type` to override.
4
+ [2024-06-04 20:08:25] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (2048)
5
+ [2024-06-04 20:08:25] INFO llama_model.py:72: prefill_chunk_size defaults to 2048
6
+ [2024-06-04 20:08:25] INFO config.py:107: Overriding max_batch_size from 1 to 80
7
+ [2024-06-04 20:08:25] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 1
8
+ [2024-06-04 20:08:25] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: 2
9
+ [2024-06-04 20:08:25] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 0
10
+ [2024-06-04 20:08:25] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer.model. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC/tokenizer.model
11
+ [2024-06-04 20:08:25] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC/tokenizer.json
12
+ [2024-06-04 20:08:25] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/vocab.json
13
+ [2024-06-04 20:08:25] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/merges.txt
14
+ [2024-06-04 20:08:25] INFO gen_config.py:157: Not found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/added_tokens.json
15
+ [2024-06-04 20:08:25] INFO gen_config.py:155: Found tokenizer config: /models/TinyLlama-1.1B-Chat-v1.0/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC/tokenizer_config.json
16
+ [2024-06-04 20:08:25] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_fallback', 'prepend_space_in_encode': True, 'strip_space_in_decode': True}
17
+ [2024-06-04 20:08:25] INFO gen_config.py:32: [System default] Setting temperature: 1.0
18
+ [2024-06-04 20:08:25] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
19
+ [2024-06-04 20:08:25] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
20
+ [2024-06-04 20:08:25] INFO gen_config.py:32: [System default] Setting repetition_penalty: 1.0
21
+ [2024-06-04 20:08:25] INFO gen_config.py:32: [System default] Setting top_p: 1.0
22
+ [2024-06-04 20:08:25] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC/mlc-chat-config.json
23
+ /opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/TinyLlama-1.1B-Chat-v1.0 --quantization q0f16 --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC
24
+ [2024-06-04 20:08:27] INFO auto_config.py:116: Found model configuration: /models/TinyLlama-1.1B-Chat-v1.0/config.json
25
+ [2024-06-04 20:08:28] INFO auto_device.py:79: Found device: cuda:0
26
+ [2024-06-04 20:08:30] INFO auto_device.py:88: Not found device: rocm:0
27
+ [2024-06-04 20:08:31] INFO auto_device.py:88: Not found device: metal:0
28
+ [2024-06-04 20:08:33] INFO auto_device.py:79: Found device: vulkan:0
29
+ [2024-06-04 20:08:33] INFO auto_device.py:79: Found device: vulkan:1
30
+ [2024-06-04 20:08:33] INFO auto_device.py:79: Found device: vulkan:2
31
+ [2024-06-04 20:08:33] INFO auto_device.py:79: Found device: vulkan:3
32
+ [2024-06-04 20:08:34] INFO auto_device.py:88: Not found device: opencl:0
33
+ [2024-06-04 20:08:34] INFO auto_device.py:35: Using device: cuda:0
34
+ [2024-06-04 20:08:34] INFO auto_weight.py:71: Finding weights in: /models/TinyLlama-1.1B-Chat-v1.0
35
+ [2024-06-04 20:08:34] INFO auto_weight.py:137: Not found Huggingface PyTorch
36
+ [2024-06-04 20:08:34] INFO auto_weight.py:144: Found source weight format: huggingface-safetensor. Source configuration: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json
37
+ [2024-06-04 20:08:34] INFO auto_weight.py:107: Using source weight configuration: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json. Use `--source` to override.
38
+ [2024-06-04 20:08:34] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
39
+ [2024-06-04 20:08:34] INFO auto_config.py:154: Found model type: llama. Use `--model-type` to override.
40
+ [2024-06-04 20:08:34] INFO llama_model.py:52: context_window_size not found in config.json. Falling back to max_position_embeddings (2048)
41
+ [2024-06-04 20:08:34] INFO llama_model.py:72: prefill_chunk_size defaults to 2048
42
+ Weight conversion with arguments:
43
+ --config /models/TinyLlama-1.1B-Chat-v1.0/config.json
44
+ --quantization NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16')
45
+ --model-type llama
46
+ --device cuda:0
47
+ --source /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors.index.json
48
+ --source-format huggingface-safetensor
49
+ --output /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC
50
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC
51
+
52
  0%| | 0/135 [00:00<?, ?it/s]
53
 
54
+
55
  0%| | 0/135 [00:00<?, ?it/s]
56
 
57
+
58
  0%| | 0/135 [00:03<?, ?it/s]
59
  1%| | 1/135 [00:04<09:19, 4.17s/it]
60
 
61
+
62
  1%| | 1/135 [00:04<09:19, 4.17s/it]
63
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
64
 
65
+
66
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
67
 
68
+
69
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
70
 
71
+
72
  1%|▏ | 2/135 [00:04<04:37, 2.09s/it]
73
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
74
 
75
+
76
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
77
 
78
+
79
  4%|▎ | 5/135 [00:05<01:27, 1.48it/s]
80
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
81
 
82
+
83
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
84
 
85
+
86
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
87
 
88
+
89
  5%|▌ | 7/135 [00:05<00:54, 2.36it/s]
90
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
91
 
92
+
93
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
94
 
95
+
96
  7%|▋ | 10/135 [00:05<00:30, 4.08it/s]
97
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
98
 
99
+
100
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
101
 
102
+
103
  9%|▉ | 12/135 [00:05<00:24, 4.93it/s]
104
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
105
 
106
+
107
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
108
 
109
+
110
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
111
 
112
+
113
  10%|█ | 14/135 [00:05<00:19, 6.36it/s]
114
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
115
 
116
+
117
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
118
 
119
+
120
  13%|█▎ | 17/135 [00:05<00:15, 7.70it/s]
121
 
122
+
123
  13%|█▎ | 17/135 [00:06<00:15, 7.70it/s]
124
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
125
 
126
+
127
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
128
 
129
+
130
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
131
 
132
+
133
  15%|█▍ | 20/135 [00:06<00:11, 10.40it/s]
134
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
135
 
136
+
137
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
138
 
139
+
140
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
141
 
142
+
143
  17%|█▋ | 23/135 [00:06<00:11, 10.16it/s]
144
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
145
 
146
+
147
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
148
 
149
+
150
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
151
 
152
+
153
  19%|█▉ | 26/135 [00:06<00:08, 12.86it/s]
154
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
155
 
156
+
157
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
158
 
159
+
160
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
161
 
162
+
163
  21%|██▏ | 29/135 [00:06<00:08, 12.28it/s]
164
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
165
 
166
+
167
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
168
 
169
+
170
  24%|██▎ | 32/135 [00:06<00:06, 14.78it/s]
171
 
172
+
173
  24%|██▎ | 32/135 [00:07<00:06, 14.78it/s]
174
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
175
 
176
+
177
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
178
 
179
+
180
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
181
 
182
+
183
  26%|██▌ | 35/135 [00:07<00:07, 12.60it/s]
184
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
185
 
186
+
187
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
188
 
189
+
190
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
191
 
192
+
193
  28%|██▊ | 38/135 [00:07<00:06, 15.07it/s]
194
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
195
 
196
+
197
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
198
 
199
+
200
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
201
 
202
+
203
  30%|███ | 41/135 [00:07<00:07, 13.33it/s]
204
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
205
 
206
+
207
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
208
 
209
+
210
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
211
 
212
+
213
  33%|███▎ | 44/135 [00:07<00:05, 15.58it/s]
214
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
215
 
216
+
217
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
218
 
219
+
220
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
221
 
222
+
223
  35%|███▍ | 47/135 [00:08<00:06, 13.02it/s]
224
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
225
 
226
+
227
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
228
 
229
+
230
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
231
 
232
+
233
  37%|███▋ | 50/135 [00:08<00:05, 15.39it/s]
234
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
235
 
236
+
237
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
238
 
239
+
240
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
241
 
242
+
243
  39%|███▉ | 53/135 [00:08<00:05, 13.68it/s]
244
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
245
 
246
+
247
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
248
 
249
+
250
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
251
 
252
+
253
  41%|████▏ | 56/135 [00:08<00:04, 15.94it/s]
254
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
255
 
256
+
257
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
258
 
259
+
260
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
261
 
262
+
263
  44%|████▎ | 59/135 [00:08<00:05, 13.32it/s]
264
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
265
 
266
+
267
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
268
 
269
+
270
  46%|████▌ | 62/135 [00:08<00:04, 15.85it/s]
271
 
272
+
273
  46%|████▌ | 62/135 [00:09<00:04, 15.85it/s]
274
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
275
 
276
+
277
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
278
 
279
+
280
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
281
 
282
+
283
  48%|████▊ | 65/135 [00:09<00:05, 13.89it/s]
284
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
285
 
286
+
287
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
288
 
289
+
290
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
291
 
292
+
293
  50%|█████ | 68/135 [00:09<00:04, 16.19it/s]
294
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
295
 
296
+
297
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
298
 
299
+
300
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
301
 
302
+
303
  53%|█████▎ | 71/135 [00:09<00:04, 13.33it/s]
304
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
305
 
306
+
307
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
308
 
309
+
310
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
311
 
312
+
313
  55%|█████▍ | 74/135 [00:09<00:03, 15.75it/s]
314
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
315
 
316
+
317
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
318
 
319
+
320
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
321
 
322
+
323
  57%|█████▋ | 77/135 [00:10<00:04, 13.94it/s]
324
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
325
 
326
+
327
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
328
 
329
+
330
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
331
 
332
+
333
  59%|█████▉ | 80/135 [00:10<00:03, 16.18it/s]
334
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
335
 
336
+
337
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
338
 
339
+
340
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
341
 
342
+
343
  61%|██████▏ | 83/135 [00:10<00:03, 13.47it/s]
344
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
345
 
346
+
347
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
348
 
349
+
350
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
351
 
352
+
353
  64%|██████▎ | 86/135 [00:10<00:03, 15.99it/s]
354
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
355
 
356
+
357
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
358
 
359
+
360
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
361
 
362
+
363
  66%|██████▌ | 89/135 [00:10<00:03, 13.88it/s]
364
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
365
 
366
+
367
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
368
 
369
+
370
  68%|██████▊ | 92/135 [00:10<00:02, 16.18it/s]
371
 
372
+
373
  68%|██████▊ | 92/135 [00:11<00:02, 16.18it/s]
374
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
375
 
376
+
377
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
378
 
379
+
380
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
381
 
382
+
383
  70%|███████ | 95/135 [00:11<00:02, 13.39it/s]
384
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
385
 
386
+
387
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
388
 
389
+
390
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
391
 
392
+
393
  73%|███████▎ | 98/135 [00:11<00:02, 15.88it/s]
394
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
395
 
396
+
397
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
398
 
399
+
400
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
401
 
402
+
403
  75%|███████▍ | 101/135 [00:11<00:02, 14.11it/s]
404
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
405
 
406
+
407
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
408
 
409
+
410
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
411
 
412
+
413
  77%|███████▋ | 104/135 [00:11<00:01, 16.40it/s]
414
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
415
 
416
+
417
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
418
 
419
+
420
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
421
 
422
+
423
  79%|███████▉ | 107/135 [00:12<00:02, 13.62it/s]
424
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
425
 
426
+
427
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
428
 
429
+
430
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
431
 
432
+
433
  81%|████████▏ | 110/135 [00:12<00:01, 16.17it/s]
434
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
435
 
436
+
437
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
438
 
439
+
440
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
441
 
442
+
443
  84%|████████▎ | 113/135 [00:12<00:01, 14.29it/s]
444
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
445
 
446
+
447
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
448
 
449
+
450
  86%|██████���█▌ | 116/135 [00:12<00:01, 16.64it/s]
451
 
452
+
453
  86%|████████▌ | 116/135 [00:12<00:01, 16.64it/s]
454
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
455
 
456
+
457
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
458
 
459
+
460
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
461
 
462
+
463
  88%|████████▊ | 119/135 [00:12<00:01, 13.75it/s]
464
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
465
 
466
+
467
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
468
 
469
+
470
  90%|█████████ | 122/135 [00:12<00:00, 16.30it/s]
471
 
472
+
473
  90%|█████████ | 122/135 [00:13<00:00, 16.30it/s]
474
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
475
 
476
+
477
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
478
 
479
+
480
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
481
 
482
+
483
  93%|█████████▎| 125/135 [00:13<00:00, 14.36it/s]
484
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
485
 
486
+
487
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
488
 
489
+
490
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
491
 
492
+
493
  95%|█████████▍| 128/135 [00:13<00:00, 16.75it/s]
494
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
495
 
496
+
497
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
498
 
499
+
500
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
501
 
502
+
503
  97%|█████████▋| 131/135 [00:13<00:00, 13.76it/s]
504
  99%|█████████▉| 134/135 [00:13<00:00, 16.33it/s]
505
 
506
+
507
  99%|█████████▉| 134/135 [00:13<00:00, 16.33it/s]
508
+ [2024-06-04 20:08:49] INFO huggingface_loader.py:197: Unloading HF weight file: /models/TinyLlama-1.1B-Chat-v1.0/model.safetensors
509
+ [2024-06-04 20:08:50] INFO stats.py:77: Time usage: HF loading: 3.678 sec; Pre-quantization mapping: 4.070 sec; Quantization: 0.000 sec
510
+ [2024-06-04 20:08:50] INFO stats.py:91: RAM usage: Peak RAM: 4.098 GB. Total bytes loaded from disk: 4.098 GB
511
+ [2024-06-04 20:08:50] INFO convert_weight.py:155: Parameter size after quantization: 2.049 GB
512
+ [2024-06-04 20:08:50] INFO convert_weight.py:160: Total parameters: 1,100,048,384
513
+ [2024-06-04 20:08:50] INFO convert_weight.py:161: Bits per parameter: 16.000
514
+ [2024-06-04 20:08:50] INFO convert_weight.py:166: Saved to directory: /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC
515
+
516
+ All finished, 58 total shards committed, record saved to /models/mlc-delivery/hf/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q0f16-MLC/ndarray-cache.json
mlc-chat-config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_size": 2048,
7
+ "intermediate_size": 5632,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 22,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000.0,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 4,
16
+ "head_dim": 64,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "temperature": 1.0,
27
+ "presence_penalty": 0.0,
28
+ "frequency_penalty": 0.0,
29
+ "repetition_penalty": 1.0,
30
+ "top_p": 1.0,
31
+ "tokenizer_files": [
32
+ "tokenizer.model",
33
+ "tokenizer.json",
34
+ "tokenizer_config.json"
35
+ ],
36
+ "tokenizer_info": {
37
+ "token_postproc_method": "byte_fallback",
38
+ "prepend_space_in_encode": true,
39
+ "strip_space_in_decode": true
40
+ },
41
+ "conv_template": {
42
+ "name": "chatml",
43
+ "system_template": "<|im_start|>system\n{system_message}",
44
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
45
+ "system_prefix_token_ids": null,
46
+ "add_role_after_system_message": true,
47
+ "roles": {
48
+ "user": "<|im_start|>user",
49
+ "assistant": "<|im_start|>assistant"
50
+ },
51
+ "role_templates": {
52
+ "user": "{user_message}",
53
+ "assistant": "{assistant_message}",
54
+ "tool": "{tool_message}"
55
+ },
56
+ "messages": [],
57
+ "seps": [
58
+ "<|im_end|>\n"
59
+ ],
60
+ "role_content_sep": "\n",
61
+ "role_empty_sep": "\n",
62
+ "stop_str": [
63
+ "<|im_end|>"
64
+ ],
65
+ "stop_token_ids": [
66
+ 2
67
+ ],
68
+ "function_string": "",
69
+ "use_function_calling": false
70
+ },
71
+ "pad_token_id": 0,
72
+ "bos_token_id": 1,
73
+ "eos_token_id": 2
74
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1913 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 135,
4
+ "ParamBytes": 2200096768.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 131072000,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 32000,
17
+ 2048
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 131072000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "2c912eb53da81bf4ed798392c6dca9c1"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 131072000,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.weight",
34
+ "shape": [
35
+ 32000,
36
+ 2048
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 131072000,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "399f96b720d3722787e4b4c53153db38"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 46137344,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 11264,
55
+ 2048
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 46137344,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "fa34e6caf91e51df6af120a36c719b67"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 23076864,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.input_layernorm.weight",
72
+ "shape": [
73
+ 2048
74
+ ],
75
+ "dtype": "float16",
76
+ "format": "f32-to-bf16",
77
+ "nbytes": 4096,
78
+ "byteOffset": 0
79
+ },
80
+ {
81
+ "name": "model.layers.0.mlp.down_proj.weight",
82
+ "shape": [
83
+ 2048,
84
+ 5632
85
+ ],
86
+ "dtype": "float16",
87
+ "format": "f32-to-bf16",
88
+ "nbytes": 23068672,
89
+ "byteOffset": 4096
90
+ },
91
+ {
92
+ "name": "model.layers.0.post_attention_layernorm.weight",
93
+ "shape": [
94
+ 2048
95
+ ],
96
+ "dtype": "float16",
97
+ "format": "f32-to-bf16",
98
+ "nbytes": 4096,
99
+ "byteOffset": 23072768
100
+ }
101
+ ],
102
+ "md5sum": "665b16451cd7819f9459d97fcd4b636f"
103
+ },
104
+ {
105
+ "dataPath": "params_shard_4.bin",
106
+ "format": "raw-shard",
107
+ "nbytes": 23068672,
108
+ "records": [
109
+ {
110
+ "name": "model.layers.1.mlp.down_proj.weight",
111
+ "shape": [
112
+ 2048,
113
+ 5632
114
+ ],
115
+ "dtype": "float16",
116
+ "format": "f32-to-bf16",
117
+ "nbytes": 23068672,
118
+ "byteOffset": 0
119
+ }
120
+ ],
121
+ "md5sum": "37ef5fa415ad986ab51a66a416aa038f"
122
+ },
123
+ {
124
+ "dataPath": "params_shard_5.bin",
125
+ "format": "raw-shard",
126
+ "nbytes": 46137344,
127
+ "records": [
128
+ {
129
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
130
+ "shape": [
131
+ 11264,
132
+ 2048
133
+ ],
134
+ "dtype": "float16",
135
+ "format": "f32-to-bf16",
136
+ "nbytes": 46137344,
137
+ "byteOffset": 0
138
+ }
139
+ ],
140
+ "md5sum": "49ef987668ce623867490def5af22e42"
141
+ },
142
+ {
143
+ "dataPath": "params_shard_6.bin",
144
+ "format": "raw-shard",
145
+ "nbytes": 29368320,
146
+ "records": [
147
+ {
148
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
149
+ "shape": [
150
+ 2560,
151
+ 2048
152
+ ],
153
+ "dtype": "float16",
154
+ "format": "f32-to-bf16",
155
+ "nbytes": 10485760,
156
+ "byteOffset": 0
157
+ },
158
+ {
159
+ "name": "model.layers.0.self_attn.o_proj.weight",
160
+ "shape": [
161
+ 2048,
162
+ 2048
163
+ ],
164
+ "dtype": "float16",
165
+ "format": "f32-to-bf16",
166
+ "nbytes": 8388608,
167
+ "byteOffset": 10485760
168
+ },
169
+ {
170
+ "name": "model.layers.1.input_layernorm.weight",
171
+ "shape": [
172
+ 2048
173
+ ],
174
+ "dtype": "float16",
175
+ "format": "f32-to-bf16",
176
+ "nbytes": 4096,
177
+ "byteOffset": 18874368
178
+ },
179
+ {
180
+ "name": "model.layers.1.post_attention_layernorm.weight",
181
+ "shape": [
182
+ 2048
183
+ ],
184
+ "dtype": "float16",
185
+ "format": "f32-to-bf16",
186
+ "nbytes": 4096,
187
+ "byteOffset": 18878464
188
+ },
189
+ {
190
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
191
+ "shape": [
192
+ 2560,
193
+ 2048
194
+ ],
195
+ "dtype": "float16",
196
+ "format": "f32-to-bf16",
197
+ "nbytes": 10485760,
198
+ "byteOffset": 18882560
199
+ }
200
+ ],
201
+ "md5sum": "1c03cee852f757a5effe5031f62d898d"
202
+ },
203
+ {
204
+ "dataPath": "params_shard_7.bin",
205
+ "format": "raw-shard",
206
+ "nbytes": 46137344,
207
+ "records": [
208
+ {
209
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
210
+ "shape": [
211
+ 11264,
212
+ 2048
213
+ ],
214
+ "dtype": "float16",
215
+ "format": "f32-to-bf16",
216
+ "nbytes": 46137344,
217
+ "byteOffset": 0
218
+ }
219
+ ],
220
+ "md5sum": "e0c42e6afb9acc2f06493b4b7ea8b697"
221
+ },
222
+ {
223
+ "dataPath": "params_shard_8.bin",
224
+ "format": "raw-shard",
225
+ "nbytes": 31465472,
226
+ "records": [
227
+ {
228
+ "name": "model.layers.1.self_attn.o_proj.weight",
229
+ "shape": [
230
+ 2048,
231
+ 2048
232
+ ],
233
+ "dtype": "float16",
234
+ "format": "f32-to-bf16",
235
+ "nbytes": 8388608,
236
+ "byteOffset": 0
237
+ },
238
+ {
239
+ "name": "model.layers.10.input_layernorm.weight",
240
+ "shape": [
241
+ 2048
242
+ ],
243
+ "dtype": "float16",
244
+ "format": "f32-to-bf16",
245
+ "nbytes": 4096,
246
+ "byteOffset": 8388608
247
+ },
248
+ {
249
+ "name": "model.layers.10.mlp.down_proj.weight",
250
+ "shape": [
251
+ 2048,
252
+ 5632
253
+ ],
254
+ "dtype": "float16",
255
+ "format": "f32-to-bf16",
256
+ "nbytes": 23068672,
257
+ "byteOffset": 8392704
258
+ },
259
+ {
260
+ "name": "model.layers.10.post_attention_layernorm.weight",
261
+ "shape": [
262
+ 2048
263
+ ],
264
+ "dtype": "float16",
265
+ "format": "f32-to-bf16",
266
+ "nbytes": 4096,
267
+ "byteOffset": 31461376
268
+ }
269
+ ],
270
+ "md5sum": "3919996e166c175913687b3011a638bf"
271
+ },
272
+ {
273
+ "dataPath": "params_shard_9.bin",
274
+ "format": "raw-shard",
275
+ "nbytes": 23068672,
276
+ "records": [
277
+ {
278
+ "name": "model.layers.11.mlp.down_proj.weight",
279
+ "shape": [
280
+ 2048,
281
+ 5632
282
+ ],
283
+ "dtype": "float16",
284
+ "format": "f32-to-bf16",
285
+ "nbytes": 23068672,
286
+ "byteOffset": 0
287
+ }
288
+ ],
289
+ "md5sum": "17f611d549b98636fd7a2cc677eafaf7"
290
+ },
291
+ {
292
+ "dataPath": "params_shard_10.bin",
293
+ "format": "raw-shard",
294
+ "nbytes": 46137344,
295
+ "records": [
296
+ {
297
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
298
+ "shape": [
299
+ 11264,
300
+ 2048
301
+ ],
302
+ "dtype": "float16",
303
+ "format": "f32-to-bf16",
304
+ "nbytes": 46137344,
305
+ "byteOffset": 0
306
+ }
307
+ ],
308
+ "md5sum": "5546632bd7f9edcbbea1997360360f51"
309
+ },
310
+ {
311
+ "dataPath": "params_shard_11.bin",
312
+ "format": "raw-shard",
313
+ "nbytes": 29368320,
314
+ "records": [
315
+ {
316
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
317
+ "shape": [
318
+ 2560,
319
+ 2048
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 10485760,
324
+ "byteOffset": 0
325
+ },
326
+ {
327
+ "name": "model.layers.10.self_attn.o_proj.weight",
328
+ "shape": [
329
+ 2048,
330
+ 2048
331
+ ],
332
+ "dtype": "float16",
333
+ "format": "f32-to-bf16",
334
+ "nbytes": 8388608,
335
+ "byteOffset": 10485760
336
+ },
337
+ {
338
+ "name": "model.layers.11.input_layernorm.weight",
339
+ "shape": [
340
+ 2048
341
+ ],
342
+ "dtype": "float16",
343
+ "format": "f32-to-bf16",
344
+ "nbytes": 4096,
345
+ "byteOffset": 18874368
346
+ },
347
+ {
348
+ "name": "model.layers.11.post_attention_layernorm.weight",
349
+ "shape": [
350
+ 2048
351
+ ],
352
+ "dtype": "float16",
353
+ "format": "f32-to-bf16",
354
+ "nbytes": 4096,
355
+ "byteOffset": 18878464
356
+ },
357
+ {
358
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
359
+ "shape": [
360
+ 2560,
361
+ 2048
362
+ ],
363
+ "dtype": "float16",
364
+ "format": "f32-to-bf16",
365
+ "nbytes": 10485760,
366
+ "byteOffset": 18882560
367
+ }
368
+ ],
369
+ "md5sum": "35b557a4c14190f9b1cd4f8766a7f98d"
370
+ },
371
+ {
372
+ "dataPath": "params_shard_12.bin",
373
+ "format": "raw-shard",
374
+ "nbytes": 46137344,
375
+ "records": [
376
+ {
377
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
378
+ "shape": [
379
+ 11264,
380
+ 2048
381
+ ],
382
+ "dtype": "float16",
383
+ "format": "f32-to-bf16",
384
+ "nbytes": 46137344,
385
+ "byteOffset": 0
386
+ }
387
+ ],
388
+ "md5sum": "c5b76239603150d360630794c8d00e51"
389
+ },
390
+ {
391
+ "dataPath": "params_shard_13.bin",
392
+ "format": "raw-shard",
393
+ "nbytes": 31465472,
394
+ "records": [
395
+ {
396
+ "name": "model.layers.11.self_attn.o_proj.weight",
397
+ "shape": [
398
+ 2048,
399
+ 2048
400
+ ],
401
+ "dtype": "float16",
402
+ "format": "f32-to-bf16",
403
+ "nbytes": 8388608,
404
+ "byteOffset": 0
405
+ },
406
+ {
407
+ "name": "model.layers.12.input_layernorm.weight",
408
+ "shape": [
409
+ 2048
410
+ ],
411
+ "dtype": "float16",
412
+ "format": "f32-to-bf16",
413
+ "nbytes": 4096,
414
+ "byteOffset": 8388608
415
+ },
416
+ {
417
+ "name": "model.layers.12.mlp.down_proj.weight",
418
+ "shape": [
419
+ 2048,
420
+ 5632
421
+ ],
422
+ "dtype": "float16",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 23068672,
425
+ "byteOffset": 8392704
426
+ },
427
+ {
428
+ "name": "model.layers.12.post_attention_layernorm.weight",
429
+ "shape": [
430
+ 2048
431
+ ],
432
+ "dtype": "float16",
433
+ "format": "f32-to-bf16",
434
+ "nbytes": 4096,
435
+ "byteOffset": 31461376
436
+ }
437
+ ],
438
+ "md5sum": "eb45fd07423c9ef8bdd2594bfb82aadb"
439
+ },
440
+ {
441
+ "dataPath": "params_shard_14.bin",
442
+ "format": "raw-shard",
443
+ "nbytes": 23068672,
444
+ "records": [
445
+ {
446
+ "name": "model.layers.13.mlp.down_proj.weight",
447
+ "shape": [
448
+ 2048,
449
+ 5632
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 23068672,
454
+ "byteOffset": 0
455
+ }
456
+ ],
457
+ "md5sum": "549207afa7495fd9633b2bcb505290da"
458
+ },
459
+ {
460
+ "dataPath": "params_shard_15.bin",
461
+ "format": "raw-shard",
462
+ "nbytes": 46137344,
463
+ "records": [
464
+ {
465
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
466
+ "shape": [
467
+ 11264,
468
+ 2048
469
+ ],
470
+ "dtype": "float16",
471
+ "format": "f32-to-bf16",
472
+ "nbytes": 46137344,
473
+ "byteOffset": 0
474
+ }
475
+ ],
476
+ "md5sum": "5647a65140d6c8b4f4822d23503c8a0c"
477
+ },
478
+ {
479
+ "dataPath": "params_shard_16.bin",
480
+ "format": "raw-shard",
481
+ "nbytes": 29368320,
482
+ "records": [
483
+ {
484
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
485
+ "shape": [
486
+ 2560,
487
+ 2048
488
+ ],
489
+ "dtype": "float16",
490
+ "format": "f32-to-bf16",
491
+ "nbytes": 10485760,
492
+ "byteOffset": 0
493
+ },
494
+ {
495
+ "name": "model.layers.12.self_attn.o_proj.weight",
496
+ "shape": [
497
+ 2048,
498
+ 2048
499
+ ],
500
+ "dtype": "float16",
501
+ "format": "f32-to-bf16",
502
+ "nbytes": 8388608,
503
+ "byteOffset": 10485760
504
+ },
505
+ {
506
+ "name": "model.layers.13.input_layernorm.weight",
507
+ "shape": [
508
+ 2048
509
+ ],
510
+ "dtype": "float16",
511
+ "format": "f32-to-bf16",
512
+ "nbytes": 4096,
513
+ "byteOffset": 18874368
514
+ },
515
+ {
516
+ "name": "model.layers.13.post_attention_layernorm.weight",
517
+ "shape": [
518
+ 2048
519
+ ],
520
+ "dtype": "float16",
521
+ "format": "f32-to-bf16",
522
+ "nbytes": 4096,
523
+ "byteOffset": 18878464
524
+ },
525
+ {
526
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
527
+ "shape": [
528
+ 2560,
529
+ 2048
530
+ ],
531
+ "dtype": "float16",
532
+ "format": "f32-to-bf16",
533
+ "nbytes": 10485760,
534
+ "byteOffset": 18882560
535
+ }
536
+ ],
537
+ "md5sum": "6adfbc4e603f8276d5955c143c88de13"
538
+ },
539
+ {
540
+ "dataPath": "params_shard_17.bin",
541
+ "format": "raw-shard",
542
+ "nbytes": 46137344,
543
+ "records": [
544
+ {
545
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
546
+ "shape": [
547
+ 11264,
548
+ 2048
549
+ ],
550
+ "dtype": "float16",
551
+ "format": "f32-to-bf16",
552
+ "nbytes": 46137344,
553
+ "byteOffset": 0
554
+ }
555
+ ],
556
+ "md5sum": "208c2247e69e1af227cb0763f1585adf"
557
+ },
558
+ {
559
+ "dataPath": "params_shard_18.bin",
560
+ "format": "raw-shard",
561
+ "nbytes": 31465472,
562
+ "records": [
563
+ {
564
+ "name": "model.layers.13.self_attn.o_proj.weight",
565
+ "shape": [
566
+ 2048,
567
+ 2048
568
+ ],
569
+ "dtype": "float16",
570
+ "format": "f32-to-bf16",
571
+ "nbytes": 8388608,
572
+ "byteOffset": 0
573
+ },
574
+ {
575
+ "name": "model.layers.14.input_layernorm.weight",
576
+ "shape": [
577
+ 2048
578
+ ],
579
+ "dtype": "float16",
580
+ "format": "f32-to-bf16",
581
+ "nbytes": 4096,
582
+ "byteOffset": 8388608
583
+ },
584
+ {
585
+ "name": "model.layers.14.mlp.down_proj.weight",
586
+ "shape": [
587
+ 2048,
588
+ 5632
589
+ ],
590
+ "dtype": "float16",
591
+ "format": "f32-to-bf16",
592
+ "nbytes": 23068672,
593
+ "byteOffset": 8392704
594
+ },
595
+ {
596
+ "name": "model.layers.14.post_attention_layernorm.weight",
597
+ "shape": [
598
+ 2048
599
+ ],
600
+ "dtype": "float16",
601
+ "format": "f32-to-bf16",
602
+ "nbytes": 4096,
603
+ "byteOffset": 31461376
604
+ }
605
+ ],
606
+ "md5sum": "e5a3358dbf6572e5f29eb0574a2710f8"
607
+ },
608
+ {
609
+ "dataPath": "params_shard_19.bin",
610
+ "format": "raw-shard",
611
+ "nbytes": 23068672,
612
+ "records": [
613
+ {
614
+ "name": "model.layers.15.mlp.down_proj.weight",
615
+ "shape": [
616
+ 2048,
617
+ 5632
618
+ ],
619
+ "dtype": "float16",
620
+ "format": "f32-to-bf16",
621
+ "nbytes": 23068672,
622
+ "byteOffset": 0
623
+ }
624
+ ],
625
+ "md5sum": "3693cba59637b5c149ea328e75d65f10"
626
+ },
627
+ {
628
+ "dataPath": "params_shard_20.bin",
629
+ "format": "raw-shard",
630
+ "nbytes": 46137344,
631
+ "records": [
632
+ {
633
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
634
+ "shape": [
635
+ 11264,
636
+ 2048
637
+ ],
638
+ "dtype": "float16",
639
+ "format": "f32-to-bf16",
640
+ "nbytes": 46137344,
641
+ "byteOffset": 0
642
+ }
643
+ ],
644
+ "md5sum": "f9b6be0eef3c969c9399ec2c19a86812"
645
+ },
646
+ {
647
+ "dataPath": "params_shard_21.bin",
648
+ "format": "raw-shard",
649
+ "nbytes": 29368320,
650
+ "records": [
651
+ {
652
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
653
+ "shape": [
654
+ 2560,
655
+ 2048
656
+ ],
657
+ "dtype": "float16",
658
+ "format": "f32-to-bf16",
659
+ "nbytes": 10485760,
660
+ "byteOffset": 0
661
+ },
662
+ {
663
+ "name": "model.layers.14.self_attn.o_proj.weight",
664
+ "shape": [
665
+ 2048,
666
+ 2048
667
+ ],
668
+ "dtype": "float16",
669
+ "format": "f32-to-bf16",
670
+ "nbytes": 8388608,
671
+ "byteOffset": 10485760
672
+ },
673
+ {
674
+ "name": "model.layers.15.input_layernorm.weight",
675
+ "shape": [
676
+ 2048
677
+ ],
678
+ "dtype": "float16",
679
+ "format": "f32-to-bf16",
680
+ "nbytes": 4096,
681
+ "byteOffset": 18874368
682
+ },
683
+ {
684
+ "name": "model.layers.15.post_attention_layernorm.weight",
685
+ "shape": [
686
+ 2048
687
+ ],
688
+ "dtype": "float16",
689
+ "format": "f32-to-bf16",
690
+ "nbytes": 4096,
691
+ "byteOffset": 18878464
692
+ },
693
+ {
694
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
695
+ "shape": [
696
+ 2560,
697
+ 2048
698
+ ],
699
+ "dtype": "float16",
700
+ "format": "f32-to-bf16",
701
+ "nbytes": 10485760,
702
+ "byteOffset": 18882560
703
+ }
704
+ ],
705
+ "md5sum": "3bda08ef56d03fa0935cda8e62f577e8"
706
+ },
707
+ {
708
+ "dataPath": "params_shard_22.bin",
709
+ "format": "raw-shard",
710
+ "nbytes": 46137344,
711
+ "records": [
712
+ {
713
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
714
+ "shape": [
715
+ 11264,
716
+ 2048
717
+ ],
718
+ "dtype": "float16",
719
+ "format": "f32-to-bf16",
720
+ "nbytes": 46137344,
721
+ "byteOffset": 0
722
+ }
723
+ ],
724
+ "md5sum": "5f214b1f00df2168b293c54b93f698da"
725
+ },
726
+ {
727
+ "dataPath": "params_shard_23.bin",
728
+ "format": "raw-shard",
729
+ "nbytes": 31465472,
730
+ "records": [
731
+ {
732
+ "name": "model.layers.15.self_attn.o_proj.weight",
733
+ "shape": [
734
+ 2048,
735
+ 2048
736
+ ],
737
+ "dtype": "float16",
738
+ "format": "f32-to-bf16",
739
+ "nbytes": 8388608,
740
+ "byteOffset": 0
741
+ },
742
+ {
743
+ "name": "model.layers.16.input_layernorm.weight",
744
+ "shape": [
745
+ 2048
746
+ ],
747
+ "dtype": "float16",
748
+ "format": "f32-to-bf16",
749
+ "nbytes": 4096,
750
+ "byteOffset": 8388608
751
+ },
752
+ {
753
+ "name": "model.layers.16.mlp.down_proj.weight",
754
+ "shape": [
755
+ 2048,
756
+ 5632
757
+ ],
758
+ "dtype": "float16",
759
+ "format": "f32-to-bf16",
760
+ "nbytes": 23068672,
761
+ "byteOffset": 8392704
762
+ },
763
+ {
764
+ "name": "model.layers.16.post_attention_layernorm.weight",
765
+ "shape": [
766
+ 2048
767
+ ],
768
+ "dtype": "float16",
769
+ "format": "f32-to-bf16",
770
+ "nbytes": 4096,
771
+ "byteOffset": 31461376
772
+ }
773
+ ],
774
+ "md5sum": "02f1b4551f1f43ea9a9d3eaef08949c7"
775
+ },
776
+ {
777
+ "dataPath": "params_shard_24.bin",
778
+ "format": "raw-shard",
779
+ "nbytes": 23068672,
780
+ "records": [
781
+ {
782
+ "name": "model.layers.17.mlp.down_proj.weight",
783
+ "shape": [
784
+ 2048,
785
+ 5632
786
+ ],
787
+ "dtype": "float16",
788
+ "format": "f32-to-bf16",
789
+ "nbytes": 23068672,
790
+ "byteOffset": 0
791
+ }
792
+ ],
793
+ "md5sum": "91a33e4116fcaf5f483a6af8a5c84d06"
794
+ },
795
+ {
796
+ "dataPath": "params_shard_25.bin",
797
+ "format": "raw-shard",
798
+ "nbytes": 46137344,
799
+ "records": [
800
+ {
801
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
802
+ "shape": [
803
+ 11264,
804
+ 2048
805
+ ],
806
+ "dtype": "float16",
807
+ "format": "f32-to-bf16",
808
+ "nbytes": 46137344,
809
+ "byteOffset": 0
810
+ }
811
+ ],
812
+ "md5sum": "d9a62f0ab43e06c6ada7e8f33c0b2e9b"
813
+ },
814
+ {
815
+ "dataPath": "params_shard_26.bin",
816
+ "format": "raw-shard",
817
+ "nbytes": 29368320,
818
+ "records": [
819
+ {
820
+ "name": "model.layers.16.self_attn.qkv_proj.weight",
821
+ "shape": [
822
+ 2560,
823
+ 2048
824
+ ],
825
+ "dtype": "float16",
826
+ "format": "f32-to-bf16",
827
+ "nbytes": 10485760,
828
+ "byteOffset": 0
829
+ },
830
+ {
831
+ "name": "model.layers.16.self_attn.o_proj.weight",
832
+ "shape": [
833
+ 2048,
834
+ 2048
835
+ ],
836
+ "dtype": "float16",
837
+ "format": "f32-to-bf16",
838
+ "nbytes": 8388608,
839
+ "byteOffset": 10485760
840
+ },
841
+ {
842
+ "name": "model.layers.17.input_layernorm.weight",
843
+ "shape": [
844
+ 2048
845
+ ],
846
+ "dtype": "float16",
847
+ "format": "f32-to-bf16",
848
+ "nbytes": 4096,
849
+ "byteOffset": 18874368
850
+ },
851
+ {
852
+ "name": "model.layers.17.post_attention_layernorm.weight",
853
+ "shape": [
854
+ 2048
855
+ ],
856
+ "dtype": "float16",
857
+ "format": "f32-to-bf16",
858
+ "nbytes": 4096,
859
+ "byteOffset": 18878464
860
+ },
861
+ {
862
+ "name": "model.layers.17.self_attn.qkv_proj.weight",
863
+ "shape": [
864
+ 2560,
865
+ 2048
866
+ ],
867
+ "dtype": "float16",
868
+ "format": "f32-to-bf16",
869
+ "nbytes": 10485760,
870
+ "byteOffset": 18882560
871
+ }
872
+ ],
873
+ "md5sum": "062d11c5e83c3ab3f29441999dff582a"
874
+ },
875
+ {
876
+ "dataPath": "params_shard_27.bin",
877
+ "format": "raw-shard",
878
+ "nbytes": 46137344,
879
+ "records": [
880
+ {
881
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
882
+ "shape": [
883
+ 11264,
884
+ 2048
885
+ ],
886
+ "dtype": "float16",
887
+ "format": "f32-to-bf16",
888
+ "nbytes": 46137344,
889
+ "byteOffset": 0
890
+ }
891
+ ],
892
+ "md5sum": "12fb5e1c494c84a7956d41a25cdd1d7e"
893
+ },
894
+ {
895
+ "dataPath": "params_shard_28.bin",
896
+ "format": "raw-shard",
897
+ "nbytes": 31465472,
898
+ "records": [
899
+ {
900
+ "name": "model.layers.17.self_attn.o_proj.weight",
901
+ "shape": [
902
+ 2048,
903
+ 2048
904
+ ],
905
+ "dtype": "float16",
906
+ "format": "f32-to-bf16",
907
+ "nbytes": 8388608,
908
+ "byteOffset": 0
909
+ },
910
+ {
911
+ "name": "model.layers.18.input_layernorm.weight",
912
+ "shape": [
913
+ 2048
914
+ ],
915
+ "dtype": "float16",
916
+ "format": "f32-to-bf16",
917
+ "nbytes": 4096,
918
+ "byteOffset": 8388608
919
+ },
920
+ {
921
+ "name": "model.layers.18.mlp.down_proj.weight",
922
+ "shape": [
923
+ 2048,
924
+ 5632
925
+ ],
926
+ "dtype": "float16",
927
+ "format": "f32-to-bf16",
928
+ "nbytes": 23068672,
929
+ "byteOffset": 8392704
930
+ },
931
+ {
932
+ "name": "model.layers.18.post_attention_layernorm.weight",
933
+ "shape": [
934
+ 2048
935
+ ],
936
+ "dtype": "float16",
937
+ "format": "f32-to-bf16",
938
+ "nbytes": 4096,
939
+ "byteOffset": 31461376
940
+ }
941
+ ],
942
+ "md5sum": "7c7cb40e5739912436dffe8f5415a653"
943
+ },
944
+ {
945
+ "dataPath": "params_shard_29.bin",
946
+ "format": "raw-shard",
947
+ "nbytes": 23068672,
948
+ "records": [
949
+ {
950
+ "name": "model.layers.19.mlp.down_proj.weight",
951
+ "shape": [
952
+ 2048,
953
+ 5632
954
+ ],
955
+ "dtype": "float16",
956
+ "format": "f32-to-bf16",
957
+ "nbytes": 23068672,
958
+ "byteOffset": 0
959
+ }
960
+ ],
961
+ "md5sum": "e19364880512f131126e7385d9c32eac"
962
+ },
963
+ {
964
+ "dataPath": "params_shard_30.bin",
965
+ "format": "raw-shard",
966
+ "nbytes": 46137344,
967
+ "records": [
968
+ {
969
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
970
+ "shape": [
971
+ 11264,
972
+ 2048
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 46137344,
977
+ "byteOffset": 0
978
+ }
979
+ ],
980
+ "md5sum": "64f981313696f188e4bd7478ef039125"
981
+ },
982
+ {
983
+ "dataPath": "params_shard_31.bin",
984
+ "format": "raw-shard",
985
+ "nbytes": 29368320,
986
+ "records": [
987
+ {
988
+ "name": "model.layers.18.self_attn.qkv_proj.weight",
989
+ "shape": [
990
+ 2560,
991
+ 2048
992
+ ],
993
+ "dtype": "float16",
994
+ "format": "f32-to-bf16",
995
+ "nbytes": 10485760,
996
+ "byteOffset": 0
997
+ },
998
+ {
999
+ "name": "model.layers.18.self_attn.o_proj.weight",
1000
+ "shape": [
1001
+ 2048,
1002
+ 2048
1003
+ ],
1004
+ "dtype": "float16",
1005
+ "format": "f32-to-bf16",
1006
+ "nbytes": 8388608,
1007
+ "byteOffset": 10485760
1008
+ },
1009
+ {
1010
+ "name": "model.layers.19.input_layernorm.weight",
1011
+ "shape": [
1012
+ 2048
1013
+ ],
1014
+ "dtype": "float16",
1015
+ "format": "f32-to-bf16",
1016
+ "nbytes": 4096,
1017
+ "byteOffset": 18874368
1018
+ },
1019
+ {
1020
+ "name": "model.layers.19.post_attention_layernorm.weight",
1021
+ "shape": [
1022
+ 2048
1023
+ ],
1024
+ "dtype": "float16",
1025
+ "format": "f32-to-bf16",
1026
+ "nbytes": 4096,
1027
+ "byteOffset": 18878464
1028
+ },
1029
+ {
1030
+ "name": "model.layers.19.self_attn.qkv_proj.weight",
1031
+ "shape": [
1032
+ 2560,
1033
+ 2048
1034
+ ],
1035
+ "dtype": "float16",
1036
+ "format": "f32-to-bf16",
1037
+ "nbytes": 10485760,
1038
+ "byteOffset": 18882560
1039
+ }
1040
+ ],
1041
+ "md5sum": "432c244c8ec1a184ad618484d7cf897b"
1042
+ },
1043
+ {
1044
+ "dataPath": "params_shard_32.bin",
1045
+ "format": "raw-shard",
1046
+ "nbytes": 46137344,
1047
+ "records": [
1048
+ {
1049
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1050
+ "shape": [
1051
+ 11264,
1052
+ 2048
1053
+ ],
1054
+ "dtype": "float16",
1055
+ "format": "f32-to-bf16",
1056
+ "nbytes": 46137344,
1057
+ "byteOffset": 0
1058
+ }
1059
+ ],
1060
+ "md5sum": "63271c6a96f05bc97bf159176e6d5589"
1061
+ },
1062
+ {
1063
+ "dataPath": "params_shard_33.bin",
1064
+ "format": "raw-shard",
1065
+ "nbytes": 31465472,
1066
+ "records": [
1067
+ {
1068
+ "name": "model.layers.19.self_attn.o_proj.weight",
1069
+ "shape": [
1070
+ 2048,
1071
+ 2048
1072
+ ],
1073
+ "dtype": "float16",
1074
+ "format": "f32-to-bf16",
1075
+ "nbytes": 8388608,
1076
+ "byteOffset": 0
1077
+ },
1078
+ {
1079
+ "name": "model.layers.2.input_layernorm.weight",
1080
+ "shape": [
1081
+ 2048
1082
+ ],
1083
+ "dtype": "float16",
1084
+ "format": "f32-to-bf16",
1085
+ "nbytes": 4096,
1086
+ "byteOffset": 8388608
1087
+ },
1088
+ {
1089
+ "name": "model.layers.2.mlp.down_proj.weight",
1090
+ "shape": [
1091
+ 2048,
1092
+ 5632
1093
+ ],
1094
+ "dtype": "float16",
1095
+ "format": "f32-to-bf16",
1096
+ "nbytes": 23068672,
1097
+ "byteOffset": 8392704
1098
+ },
1099
+ {
1100
+ "name": "model.layers.2.post_attention_layernorm.weight",
1101
+ "shape": [
1102
+ 2048
1103
+ ],
1104
+ "dtype": "float16",
1105
+ "format": "f32-to-bf16",
1106
+ "nbytes": 4096,
1107
+ "byteOffset": 31461376
1108
+ }
1109
+ ],
1110
+ "md5sum": "cadfaa50f214b92d0c35d239543c61e4"
1111
+ },
1112
+ {
1113
+ "dataPath": "params_shard_34.bin",
1114
+ "format": "raw-shard",
1115
+ "nbytes": 23068672,
1116
+ "records": [
1117
+ {
1118
+ "name": "model.layers.20.mlp.down_proj.weight",
1119
+ "shape": [
1120
+ 2048,
1121
+ 5632
1122
+ ],
1123
+ "dtype": "float16",
1124
+ "format": "f32-to-bf16",
1125
+ "nbytes": 23068672,
1126
+ "byteOffset": 0
1127
+ }
1128
+ ],
1129
+ "md5sum": "33dc7e372670369500baae0a73e738e5"
1130
+ },
1131
+ {
1132
+ "dataPath": "params_shard_35.bin",
1133
+ "format": "raw-shard",
1134
+ "nbytes": 46137344,
1135
+ "records": [
1136
+ {
1137
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1138
+ "shape": [
1139
+ 11264,
1140
+ 2048
1141
+ ],
1142
+ "dtype": "float16",
1143
+ "format": "f32-to-bf16",
1144
+ "nbytes": 46137344,
1145
+ "byteOffset": 0
1146
+ }
1147
+ ],
1148
+ "md5sum": "9c2ed598e4412ee933e673ef0606d300"
1149
+ },
1150
+ {
1151
+ "dataPath": "params_shard_36.bin",
1152
+ "format": "raw-shard",
1153
+ "nbytes": 29368320,
1154
+ "records": [
1155
+ {
1156
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
1157
+ "shape": [
1158
+ 2560,
1159
+ 2048
1160
+ ],
1161
+ "dtype": "float16",
1162
+ "format": "f32-to-bf16",
1163
+ "nbytes": 10485760,
1164
+ "byteOffset": 0
1165
+ },
1166
+ {
1167
+ "name": "model.layers.2.self_attn.o_proj.weight",
1168
+ "shape": [
1169
+ 2048,
1170
+ 2048
1171
+ ],
1172
+ "dtype": "float16",
1173
+ "format": "f32-to-bf16",
1174
+ "nbytes": 8388608,
1175
+ "byteOffset": 10485760
1176
+ },
1177
+ {
1178
+ "name": "model.layers.20.input_layernorm.weight",
1179
+ "shape": [
1180
+ 2048
1181
+ ],
1182
+ "dtype": "float16",
1183
+ "format": "f32-to-bf16",
1184
+ "nbytes": 4096,
1185
+ "byteOffset": 18874368
1186
+ },
1187
+ {
1188
+ "name": "model.layers.20.post_attention_layernorm.weight",
1189
+ "shape": [
1190
+ 2048
1191
+ ],
1192
+ "dtype": "float16",
1193
+ "format": "f32-to-bf16",
1194
+ "nbytes": 4096,
1195
+ "byteOffset": 18878464
1196
+ },
1197
+ {
1198
+ "name": "model.layers.20.self_attn.qkv_proj.weight",
1199
+ "shape": [
1200
+ 2560,
1201
+ 2048
1202
+ ],
1203
+ "dtype": "float16",
1204
+ "format": "f32-to-bf16",
1205
+ "nbytes": 10485760,
1206
+ "byteOffset": 18882560
1207
+ }
1208
+ ],
1209
+ "md5sum": "d2f3b396661e2381677d102eedf9fec2"
1210
+ },
1211
+ {
1212
+ "dataPath": "params_shard_37.bin",
1213
+ "format": "raw-shard",
1214
+ "nbytes": 46137344,
1215
+ "records": [
1216
+ {
1217
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1218
+ "shape": [
1219
+ 11264,
1220
+ 2048
1221
+ ],
1222
+ "dtype": "float16",
1223
+ "format": "f32-to-bf16",
1224
+ "nbytes": 46137344,
1225
+ "byteOffset": 0
1226
+ }
1227
+ ],
1228
+ "md5sum": "90560d68478e9aaf9ab18f7d565e2979"
1229
+ },
1230
+ {
1231
+ "dataPath": "params_shard_38.bin",
1232
+ "format": "raw-shard",
1233
+ "nbytes": 31465472,
1234
+ "records": [
1235
+ {
1236
+ "name": "model.layers.20.self_attn.o_proj.weight",
1237
+ "shape": [
1238
+ 2048,
1239
+ 2048
1240
+ ],
1241
+ "dtype": "float16",
1242
+ "format": "f32-to-bf16",
1243
+ "nbytes": 8388608,
1244
+ "byteOffset": 0
1245
+ },
1246
+ {
1247
+ "name": "model.layers.21.input_layernorm.weight",
1248
+ "shape": [
1249
+ 2048
1250
+ ],
1251
+ "dtype": "float16",
1252
+ "format": "f32-to-bf16",
1253
+ "nbytes": 4096,
1254
+ "byteOffset": 8388608
1255
+ },
1256
+ {
1257
+ "name": "model.layers.21.mlp.down_proj.weight",
1258
+ "shape": [
1259
+ 2048,
1260
+ 5632
1261
+ ],
1262
+ "dtype": "float16",
1263
+ "format": "f32-to-bf16",
1264
+ "nbytes": 23068672,
1265
+ "byteOffset": 8392704
1266
+ },
1267
+ {
1268
+ "name": "model.layers.21.post_attention_layernorm.weight",
1269
+ "shape": [
1270
+ 2048
1271
+ ],
1272
+ "dtype": "float16",
1273
+ "format": "f32-to-bf16",
1274
+ "nbytes": 4096,
1275
+ "byteOffset": 31461376
1276
+ }
1277
+ ],
1278
+ "md5sum": "f7dad919d98f25f033bdb2f1e670213e"
1279
+ },
1280
+ {
1281
+ "dataPath": "params_shard_39.bin",
1282
+ "format": "raw-shard",
1283
+ "nbytes": 23068672,
1284
+ "records": [
1285
+ {
1286
+ "name": "model.layers.3.mlp.down_proj.weight",
1287
+ "shape": [
1288
+ 2048,
1289
+ 5632
1290
+ ],
1291
+ "dtype": "float16",
1292
+ "format": "f32-to-bf16",
1293
+ "nbytes": 23068672,
1294
+ "byteOffset": 0
1295
+ }
1296
+ ],
1297
+ "md5sum": "7058d133bf8c15defd0608f5cc39ecf5"
1298
+ },
1299
+ {
1300
+ "dataPath": "params_shard_40.bin",
1301
+ "format": "raw-shard",
1302
+ "nbytes": 46137344,
1303
+ "records": [
1304
+ {
1305
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1306
+ "shape": [
1307
+ 11264,
1308
+ 2048
1309
+ ],
1310
+ "dtype": "float16",
1311
+ "format": "f32-to-bf16",
1312
+ "nbytes": 46137344,
1313
+ "byteOffset": 0
1314
+ }
1315
+ ],
1316
+ "md5sum": "8c9470bf89d281643a4f7ecfbceded3b"
1317
+ },
1318
+ {
1319
+ "dataPath": "params_shard_41.bin",
1320
+ "format": "raw-shard",
1321
+ "nbytes": 29368320,
1322
+ "records": [
1323
+ {
1324
+ "name": "model.layers.21.self_attn.qkv_proj.weight",
1325
+ "shape": [
1326
+ 2560,
1327
+ 2048
1328
+ ],
1329
+ "dtype": "float16",
1330
+ "format": "f32-to-bf16",
1331
+ "nbytes": 10485760,
1332
+ "byteOffset": 0
1333
+ },
1334
+ {
1335
+ "name": "model.layers.21.self_attn.o_proj.weight",
1336
+ "shape": [
1337
+ 2048,
1338
+ 2048
1339
+ ],
1340
+ "dtype": "float16",
1341
+ "format": "f32-to-bf16",
1342
+ "nbytes": 8388608,
1343
+ "byteOffset": 10485760
1344
+ },
1345
+ {
1346
+ "name": "model.layers.3.input_layernorm.weight",
1347
+ "shape": [
1348
+ 2048
1349
+ ],
1350
+ "dtype": "float16",
1351
+ "format": "f32-to-bf16",
1352
+ "nbytes": 4096,
1353
+ "byteOffset": 18874368
1354
+ },
1355
+ {
1356
+ "name": "model.layers.3.post_attention_layernorm.weight",
1357
+ "shape": [
1358
+ 2048
1359
+ ],
1360
+ "dtype": "float16",
1361
+ "format": "f32-to-bf16",
1362
+ "nbytes": 4096,
1363
+ "byteOffset": 18878464
1364
+ },
1365
+ {
1366
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
1367
+ "shape": [
1368
+ 2560,
1369
+ 2048
1370
+ ],
1371
+ "dtype": "float16",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 10485760,
1374
+ "byteOffset": 18882560
1375
+ }
1376
+ ],
1377
+ "md5sum": "be1adb2bbb2b85b38ad3811d55eee72a"
1378
+ },
1379
+ {
1380
+ "dataPath": "params_shard_42.bin",
1381
+ "format": "raw-shard",
1382
+ "nbytes": 46137344,
1383
+ "records": [
1384
+ {
1385
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
1386
+ "shape": [
1387
+ 11264,
1388
+ 2048
1389
+ ],
1390
+ "dtype": "float16",
1391
+ "format": "f32-to-bf16",
1392
+ "nbytes": 46137344,
1393
+ "byteOffset": 0
1394
+ }
1395
+ ],
1396
+ "md5sum": "2a986af683615d0b0b832ccdd1c9d3d0"
1397
+ },
1398
+ {
1399
+ "dataPath": "params_shard_43.bin",
1400
+ "format": "raw-shard",
1401
+ "nbytes": 31465472,
1402
+ "records": [
1403
+ {
1404
+ "name": "model.layers.3.self_attn.o_proj.weight",
1405
+ "shape": [
1406
+ 2048,
1407
+ 2048
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 8388608,
1412
+ "byteOffset": 0
1413
+ },
1414
+ {
1415
+ "name": "model.layers.4.input_layernorm.weight",
1416
+ "shape": [
1417
+ 2048
1418
+ ],
1419
+ "dtype": "float16",
1420
+ "format": "f32-to-bf16",
1421
+ "nbytes": 4096,
1422
+ "byteOffset": 8388608
1423
+ },
1424
+ {
1425
+ "name": "model.layers.4.mlp.down_proj.weight",
1426
+ "shape": [
1427
+ 2048,
1428
+ 5632
1429
+ ],
1430
+ "dtype": "float16",
1431
+ "format": "f32-to-bf16",
1432
+ "nbytes": 23068672,
1433
+ "byteOffset": 8392704
1434
+ },
1435
+ {
1436
+ "name": "model.layers.4.post_attention_layernorm.weight",
1437
+ "shape": [
1438
+ 2048
1439
+ ],
1440
+ "dtype": "float16",
1441
+ "format": "f32-to-bf16",
1442
+ "nbytes": 4096,
1443
+ "byteOffset": 31461376
1444
+ }
1445
+ ],
1446
+ "md5sum": "0630623a24b0ed117b9d082a40400a74"
1447
+ },
1448
+ {
1449
+ "dataPath": "params_shard_44.bin",
1450
+ "format": "raw-shard",
1451
+ "nbytes": 23068672,
1452
+ "records": [
1453
+ {
1454
+ "name": "model.layers.5.mlp.down_proj.weight",
1455
+ "shape": [
1456
+ 2048,
1457
+ 5632
1458
+ ],
1459
+ "dtype": "float16",
1460
+ "format": "f32-to-bf16",
1461
+ "nbytes": 23068672,
1462
+ "byteOffset": 0
1463
+ }
1464
+ ],
1465
+ "md5sum": "8cb5908d2c193dbb86a4af843ddbf324"
1466
+ },
1467
+ {
1468
+ "dataPath": "params_shard_45.bin",
1469
+ "format": "raw-shard",
1470
+ "nbytes": 46137344,
1471
+ "records": [
1472
+ {
1473
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
1474
+ "shape": [
1475
+ 11264,
1476
+ 2048
1477
+ ],
1478
+ "dtype": "float16",
1479
+ "format": "f32-to-bf16",
1480
+ "nbytes": 46137344,
1481
+ "byteOffset": 0
1482
+ }
1483
+ ],
1484
+ "md5sum": "f181953e69d8fd705c4e4f10ccf3001f"
1485
+ },
1486
+ {
1487
+ "dataPath": "params_shard_46.bin",
1488
+ "format": "raw-shard",
1489
+ "nbytes": 29368320,
1490
+ "records": [
1491
+ {
1492
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
1493
+ "shape": [
1494
+ 2560,
1495
+ 2048
1496
+ ],
1497
+ "dtype": "float16",
1498
+ "format": "f32-to-bf16",
1499
+ "nbytes": 10485760,
1500
+ "byteOffset": 0
1501
+ },
1502
+ {
1503
+ "name": "model.layers.4.self_attn.o_proj.weight",
1504
+ "shape": [
1505
+ 2048,
1506
+ 2048
1507
+ ],
1508
+ "dtype": "float16",
1509
+ "format": "f32-to-bf16",
1510
+ "nbytes": 8388608,
1511
+ "byteOffset": 10485760
1512
+ },
1513
+ {
1514
+ "name": "model.layers.5.input_layernorm.weight",
1515
+ "shape": [
1516
+ 2048
1517
+ ],
1518
+ "dtype": "float16",
1519
+ "format": "f32-to-bf16",
1520
+ "nbytes": 4096,
1521
+ "byteOffset": 18874368
1522
+ },
1523
+ {
1524
+ "name": "model.layers.5.post_attention_layernorm.weight",
1525
+ "shape": [
1526
+ 2048
1527
+ ],
1528
+ "dtype": "float16",
1529
+ "format": "f32-to-bf16",
1530
+ "nbytes": 4096,
1531
+ "byteOffset": 18878464
1532
+ },
1533
+ {
1534
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
1535
+ "shape": [
1536
+ 2560,
1537
+ 2048
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 10485760,
1542
+ "byteOffset": 18882560
1543
+ }
1544
+ ],
1545
+ "md5sum": "42d390f90d09a8f6b87cee627a11dde9"
1546
+ },
1547
+ {
1548
+ "dataPath": "params_shard_47.bin",
1549
+ "format": "raw-shard",
1550
+ "nbytes": 46137344,
1551
+ "records": [
1552
+ {
1553
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1554
+ "shape": [
1555
+ 11264,
1556
+ 2048
1557
+ ],
1558
+ "dtype": "float16",
1559
+ "format": "f32-to-bf16",
1560
+ "nbytes": 46137344,
1561
+ "byteOffset": 0
1562
+ }
1563
+ ],
1564
+ "md5sum": "e96c76d0d8a633cb1021918c02edb71d"
1565
+ },
1566
+ {
1567
+ "dataPath": "params_shard_48.bin",
1568
+ "format": "raw-shard",
1569
+ "nbytes": 31465472,
1570
+ "records": [
1571
+ {
1572
+ "name": "model.layers.5.self_attn.o_proj.weight",
1573
+ "shape": [
1574
+ 2048,
1575
+ 2048
1576
+ ],
1577
+ "dtype": "float16",
1578
+ "format": "f32-to-bf16",
1579
+ "nbytes": 8388608,
1580
+ "byteOffset": 0
1581
+ },
1582
+ {
1583
+ "name": "model.layers.6.input_layernorm.weight",
1584
+ "shape": [
1585
+ 2048
1586
+ ],
1587
+ "dtype": "float16",
1588
+ "format": "f32-to-bf16",
1589
+ "nbytes": 4096,
1590
+ "byteOffset": 8388608
1591
+ },
1592
+ {
1593
+ "name": "model.layers.6.mlp.down_proj.weight",
1594
+ "shape": [
1595
+ 2048,
1596
+ 5632
1597
+ ],
1598
+ "dtype": "float16",
1599
+ "format": "f32-to-bf16",
1600
+ "nbytes": 23068672,
1601
+ "byteOffset": 8392704
1602
+ },
1603
+ {
1604
+ "name": "model.layers.6.post_attention_layernorm.weight",
1605
+ "shape": [
1606
+ 2048
1607
+ ],
1608
+ "dtype": "float16",
1609
+ "format": "f32-to-bf16",
1610
+ "nbytes": 4096,
1611
+ "byteOffset": 31461376
1612
+ }
1613
+ ],
1614
+ "md5sum": "b14c43e394cb826b709dd5a5ed0edf0a"
1615
+ },
1616
+ {
1617
+ "dataPath": "params_shard_49.bin",
1618
+ "format": "raw-shard",
1619
+ "nbytes": 23068672,
1620
+ "records": [
1621
+ {
1622
+ "name": "model.layers.7.mlp.down_proj.weight",
1623
+ "shape": [
1624
+ 2048,
1625
+ 5632
1626
+ ],
1627
+ "dtype": "float16",
1628
+ "format": "f32-to-bf16",
1629
+ "nbytes": 23068672,
1630
+ "byteOffset": 0
1631
+ }
1632
+ ],
1633
+ "md5sum": "9a49c18e3ea67739ec061eba259675ec"
1634
+ },
1635
+ {
1636
+ "dataPath": "params_shard_50.bin",
1637
+ "format": "raw-shard",
1638
+ "nbytes": 46137344,
1639
+ "records": [
1640
+ {
1641
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1642
+ "shape": [
1643
+ 11264,
1644
+ 2048
1645
+ ],
1646
+ "dtype": "float16",
1647
+ "format": "f32-to-bf16",
1648
+ "nbytes": 46137344,
1649
+ "byteOffset": 0
1650
+ }
1651
+ ],
1652
+ "md5sum": "fb02a48b192522544f1ecdbedd6c504e"
1653
+ },
1654
+ {
1655
+ "dataPath": "params_shard_51.bin",
1656
+ "format": "raw-shard",
1657
+ "nbytes": 29368320,
1658
+ "records": [
1659
+ {
1660
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1661
+ "shape": [
1662
+ 2560,
1663
+ 2048
1664
+ ],
1665
+ "dtype": "float16",
1666
+ "format": "f32-to-bf16",
1667
+ "nbytes": 10485760,
1668
+ "byteOffset": 0
1669
+ },
1670
+ {
1671
+ "name": "model.layers.6.self_attn.o_proj.weight",
1672
+ "shape": [
1673
+ 2048,
1674
+ 2048
1675
+ ],
1676
+ "dtype": "float16",
1677
+ "format": "f32-to-bf16",
1678
+ "nbytes": 8388608,
1679
+ "byteOffset": 10485760
1680
+ },
1681
+ {
1682
+ "name": "model.layers.7.input_layernorm.weight",
1683
+ "shape": [
1684
+ 2048
1685
+ ],
1686
+ "dtype": "float16",
1687
+ "format": "f32-to-bf16",
1688
+ "nbytes": 4096,
1689
+ "byteOffset": 18874368
1690
+ },
1691
+ {
1692
+ "name": "model.layers.7.post_attention_layernorm.weight",
1693
+ "shape": [
1694
+ 2048
1695
+ ],
1696
+ "dtype": "float16",
1697
+ "format": "f32-to-bf16",
1698
+ "nbytes": 4096,
1699
+ "byteOffset": 18878464
1700
+ },
1701
+ {
1702
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1703
+ "shape": [
1704
+ 2560,
1705
+ 2048
1706
+ ],
1707
+ "dtype": "float16",
1708
+ "format": "f32-to-bf16",
1709
+ "nbytes": 10485760,
1710
+ "byteOffset": 18882560
1711
+ }
1712
+ ],
1713
+ "md5sum": "354728bb842783df3121b3dd3c74be00"
1714
+ },
1715
+ {
1716
+ "dataPath": "params_shard_52.bin",
1717
+ "format": "raw-shard",
1718
+ "nbytes": 46137344,
1719
+ "records": [
1720
+ {
1721
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1722
+ "shape": [
1723
+ 11264,
1724
+ 2048
1725
+ ],
1726
+ "dtype": "float16",
1727
+ "format": "f32-to-bf16",
1728
+ "nbytes": 46137344,
1729
+ "byteOffset": 0
1730
+ }
1731
+ ],
1732
+ "md5sum": "9b3b2574f33b956f3d048433cc527dfc"
1733
+ },
1734
+ {
1735
+ "dataPath": "params_shard_53.bin",
1736
+ "format": "raw-shard",
1737
+ "nbytes": 31465472,
1738
+ "records": [
1739
+ {
1740
+ "name": "model.layers.7.self_attn.o_proj.weight",
1741
+ "shape": [
1742
+ 2048,
1743
+ 2048
1744
+ ],
1745
+ "dtype": "float16",
1746
+ "format": "f32-to-bf16",
1747
+ "nbytes": 8388608,
1748
+ "byteOffset": 0
1749
+ },
1750
+ {
1751
+ "name": "model.layers.8.input_layernorm.weight",
1752
+ "shape": [
1753
+ 2048
1754
+ ],
1755
+ "dtype": "float16",
1756
+ "format": "f32-to-bf16",
1757
+ "nbytes": 4096,
1758
+ "byteOffset": 8388608
1759
+ },
1760
+ {
1761
+ "name": "model.layers.8.mlp.down_proj.weight",
1762
+ "shape": [
1763
+ 2048,
1764
+ 5632
1765
+ ],
1766
+ "dtype": "float16",
1767
+ "format": "f32-to-bf16",
1768
+ "nbytes": 23068672,
1769
+ "byteOffset": 8392704
1770
+ },
1771
+ {
1772
+ "name": "model.layers.8.post_attention_layernorm.weight",
1773
+ "shape": [
1774
+ 2048
1775
+ ],
1776
+ "dtype": "float16",
1777
+ "format": "f32-to-bf16",
1778
+ "nbytes": 4096,
1779
+ "byteOffset": 31461376
1780
+ }
1781
+ ],
1782
+ "md5sum": "d7766f4b6cf1ec4a102d3a8b1a512ab5"
1783
+ },
1784
+ {
1785
+ "dataPath": "params_shard_54.bin",
1786
+ "format": "raw-shard",
1787
+ "nbytes": 23068672,
1788
+ "records": [
1789
+ {
1790
+ "name": "model.layers.9.mlp.down_proj.weight",
1791
+ "shape": [
1792
+ 2048,
1793
+ 5632
1794
+ ],
1795
+ "dtype": "float16",
1796
+ "format": "f32-to-bf16",
1797
+ "nbytes": 23068672,
1798
+ "byteOffset": 0
1799
+ }
1800
+ ],
1801
+ "md5sum": "d8486986a2147c55770851dd04cf64bc"
1802
+ },
1803
+ {
1804
+ "dataPath": "params_shard_55.bin",
1805
+ "format": "raw-shard",
1806
+ "nbytes": 46137344,
1807
+ "records": [
1808
+ {
1809
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
1810
+ "shape": [
1811
+ 11264,
1812
+ 2048
1813
+ ],
1814
+ "dtype": "float16",
1815
+ "format": "f32-to-bf16",
1816
+ "nbytes": 46137344,
1817
+ "byteOffset": 0
1818
+ }
1819
+ ],
1820
+ "md5sum": "f0cbb90d4428b9815210b55ae2e12267"
1821
+ },
1822
+ {
1823
+ "dataPath": "params_shard_56.bin",
1824
+ "format": "raw-shard",
1825
+ "nbytes": 29368320,
1826
+ "records": [
1827
+ {
1828
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
1829
+ "shape": [
1830
+ 2560,
1831
+ 2048
1832
+ ],
1833
+ "dtype": "float16",
1834
+ "format": "f32-to-bf16",
1835
+ "nbytes": 10485760,
1836
+ "byteOffset": 0
1837
+ },
1838
+ {
1839
+ "name": "model.layers.8.self_attn.o_proj.weight",
1840
+ "shape": [
1841
+ 2048,
1842
+ 2048
1843
+ ],
1844
+ "dtype": "float16",
1845
+ "format": "f32-to-bf16",
1846
+ "nbytes": 8388608,
1847
+ "byteOffset": 10485760
1848
+ },
1849
+ {
1850
+ "name": "model.layers.9.input_layernorm.weight",
1851
+ "shape": [
1852
+ 2048
1853
+ ],
1854
+ "dtype": "float16",
1855
+ "format": "f32-to-bf16",
1856
+ "nbytes": 4096,
1857
+ "byteOffset": 18874368
1858
+ },
1859
+ {
1860
+ "name": "model.layers.9.post_attention_layernorm.weight",
1861
+ "shape": [
1862
+ 2048
1863
+ ],
1864
+ "dtype": "float16",
1865
+ "format": "f32-to-bf16",
1866
+ "nbytes": 4096,
1867
+ "byteOffset": 18878464
1868
+ },
1869
+ {
1870
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
1871
+ "shape": [
1872
+ 2560,
1873
+ 2048
1874
+ ],
1875
+ "dtype": "float16",
1876
+ "format": "f32-to-bf16",
1877
+ "nbytes": 10485760,
1878
+ "byteOffset": 18882560
1879
+ }
1880
+ ],
1881
+ "md5sum": "551093d9636d08c36c4c4686750acd3d"
1882
+ },
1883
+ {
1884
+ "dataPath": "params_shard_57.bin",
1885
+ "format": "raw-shard",
1886
+ "nbytes": 8392704,
1887
+ "records": [
1888
+ {
1889
+ "name": "model.layers.9.self_attn.o_proj.weight",
1890
+ "shape": [
1891
+ 2048,
1892
+ 2048
1893
+ ],
1894
+ "dtype": "float16",
1895
+ "format": "f32-to-bf16",
1896
+ "nbytes": 8388608,
1897
+ "byteOffset": 0
1898
+ },
1899
+ {
1900
+ "name": "model.norm.weight",
1901
+ "shape": [
1902
+ 2048
1903
+ ],
1904
+ "dtype": "float16",
1905
+ "format": "f32-to-bf16",
1906
+ "nbytes": 4096,
1907
+ "byteOffset": 8388608
1908
+ }
1909
+ ],
1910
+ "md5sum": "67f5257abaa2b15cd8d1ad64db9b66fb"
1911
+ }
1912
+ ]
1913
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40bb78a2f956c07f89c7b222b3d4a5fef36b95bb75f430545016037832d749a4
3
+ size 131072000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a111cb9b6418f368b8905ab91eac077732d6ce2a54516acef6b9d060b921e23
3
+ size 131072000
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020f50c9099c72e6cbc9e29e1b433618e7be8d3ae676406f940e95aa08e3abd7
3
+ size 46137344
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db9a3edf6a7d551b6b0b4509b0d81a0463e968d62b925bc110d89cd27be1a4b4
3
+ size 29368320
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ccff959998f0cd563ad0c1d8b7f50638c35f3c637f2cfb9d852e823f94d6e26
3
+ size 46137344
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae491f096afafce65dc6074c3f2b5a0d08708cebcc8541290cee7cd6c3f78b51
3
+ size 31465472
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f0f263ad112a4442abf77c937fa804bfec28762cbde5978eff115bda4f3cbe
3
+ size 23068672
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ed89a02ad7e8c8236a1e2c736c8532b7e6dbcd265dd1f9105e5d778be6b408
3
+ size 46137344
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c83ef8d58698af39b15a16abf7695cc97a03ae14ad55854ca1b286c0110a301b
3
+ size 29368320
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e37d6540a65c7dacec2b4ce67633f6c7ed91836b22eb2b6ae76599ba00b25a9
3
+ size 46137344
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4fdbd2065a9a441f18fd49cad9e4574b0ff68d8470f1dc6f4883b012eb05d2
3
+ size 31465472
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675a9f9ec75a425220b60d6d5e61b2494baa59369adb552e2a10c253e6ef00f2
3
+ size 23068672
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0905ed98c87335db09a5be64892634dcccab88f80e96559446c2b5dd98d00d
3
+ size 46137344
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2114c727fd7820c5bbba754e41a767c06c6356a25004350c1b1abcbd5a2b6fe1
3
+ size 46137344
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41cf7031f92fa64eeaef78f1424f06bf1d3c41f79b30ac774d42f25fb1e948ee
3
+ size 29368320
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8428a8e6ca7de4a2d83a8bba4726a41d7ac1500b4da977fb369b8fc294ca4c
3
+ size 46137344
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00aa64e12ec020678d462c711a045d8e506637d2c968d6a8b8080a614706fed7
3
+ size 31465472
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c4544e52c352a83e89a83cfe72dd22055569f934c1c9f42b185ca5ac9f55bd
3
+ size 23068672
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff378f53234b4133e3cf27db35846206b46695d34476639c4338af6d95046b61
3
+ size 46137344
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0303364f9058b7059f31d2414f180fdea5e64c830ca167deb4380ba4a9fde4be
3
+ size 29368320
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680913d9ee32d484dbfce6cc6c828e5ef9a4539f56a754dc0cf61a1251619364
3
+ size 46137344
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2837196ceb01afb04f48f08eabaa73a9c91b263d5a56a1984f87d455f3a0e6d2
3
+ size 31465472
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe339e957b311b9d65ea475e98241d2fc876f46be639fb57a007abf892aa485
3
+ size 23068672
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e8cc52e3bced6491a2069b4caa0c323a681c478d025bba9f0e4e9c18307a28
3
+ size 23076864
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8cde431a5a325c8c836bc725b1e03290b4b2b3392c1e05d9265d60ab5dc0827
3
+ size 46137344
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:587659bd28cadbb1aa013075d9dc63fac39986da692338ffbd4cc34194877e14
3
+ size 29368320
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5efc5884ac16737ee562635eb2fd5017ac9fa08f70b5570c6a3c90efab05645d
3
+ size 46137344
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3050b22d0808793007ff2e131ad6a9feae1a77d0a078df9aa881d685ccd341aa
3
+ size 31465472
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880fd67b64c897b548c59a533446e17c87fdfb393e0491b7da659f282a8eb1af
3
+ size 23068672
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab077c6e52b3f1af4bbb9325d90a3f0016cebcbe4d5c0785691b43fd2cf5a59
3
+ size 46137344
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d902a8da8c04e18386a4d9bee30c0377af9d8ae83292bd687f1830b5412621a
3
+ size 29368320
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72e03e5ab337070d2216a3465ff1faad6b78025ff29cde7fa23ee5574eb13bb
3
+ size 46137344
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9bdb2a535a1dd8a0389d3a321c5a00fe697311a389b0627e60acddd7366524
3
+ size 31465472
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5361778fb63eaf705548c9d898e81833ab045b5c16387e230ad895f00a6c7770
3
+ size 23068672
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7371bc0aee90c3e6b842680144853e81acaf338051574d3a81ebb0ccca657b2b
3
+ size 23068672
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:316436340904a86ef38924eff36ac78889f44e5378e88cd7dba61a5b7c4d3565
3
+ size 46137344
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6d607609a711c6fc45e327e00e25457893b43b53750cdb97e1e3c9299c4c3f
3
+ size 29368320
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0262d846d32bf43003e22c55f567b9ecf057cc74f1f620aaf50e6837e36d3fd
3
+ size 46137344
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f5bca30386ca323a5ecba7204cd995ddf5454a7383cd1d1ca125b3499aaac2
3
+ size 31465472
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94b51ac2c4c1cf091983061408391afe41e3c48501c9305bcfe806fbe68690d9
3
+ size 23068672
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adde6e2c0ed2dce98c3f8f5500d7badeedf5e59860859d175ad7e5ea406390b3
3
+ size 46137344
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09bb6bfe2fbe9404e752b6a1883bd5978f08c07d9155806646e5937f55f8cadf
3
+ size 29368320
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7fe0a95e0ee70122aa303b8f122ea8430647e8ec3fc57eab0cb0ae782fc494
3
+ size 46137344
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f94878e58f18236a18b3a034262abf934f1657be235aafd951fd37284df5a8
3
+ size 31465472
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20e65f65c33dc3884b6c1e7efc45009941851dd933b8b210adb6a4db04df07d
3
+ size 23068672
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5dd1b6463c34d900410225ef11b4b0602ea9ae8fb65bd77b7a20b0dcbd0c279
3
+ size 46137344
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50127395351aa3645fb28ec6eda55b57a6572798638a7db42dd3f02d114a41ac
3
+ size 46137344