CharlieFRuan's picture
Initial commit
80f5e6e verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 7322112000.0,
"BitsPerParam": 4.500366415925148
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 81960960,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81960960,
"byteOffset": 0
}
],
"md5sum": "4073142467b271eec22b4addf520a64f"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "55aec6357bd7d981feaf1aec684b37d3"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "53658997652208659ed1e8d7738b17a4"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "89147837e11714f1a7a5f042134993fa"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b66f3593a0ab7a24076aac8b51fd84b4"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32896000,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10245120,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10245120
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 10255360
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14679040
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14689280
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14699520
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19123200
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 27970560
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 27980800
}
],
"md5sum": "313ed96fa0680cef4277431c5c0baffa"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0ad3d00c200a15b202948aaabe90f86c"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "31267155843a91052a2cd68dec73897e"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "821dbf29f33052983f36bd7932f4938e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "3c9000d3a65dc87c25c18da67261997b"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "40d87756c164ebef5b20c650f2bb68f6"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "892a5d0f31b9f10a8974d6f7ee90a64d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0ee788eb4abc58765fd422de8ff11ca9"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "7e3d4452df1f3c3913e0b877cfc81ec2"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "15fea230b919ec30a2ca940676d41cc1"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "be9fbf92261a7c49b83d621c9a25f708"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5a900bc0a7518eacfaf79cb684c0615a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "274409adce1b127084db601566bf34e1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1411dc4c4bde5f946e63e7745c4c7036"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e3cdd59b5dce839f054490e0e40a01f0"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3325d6b810c582de185aeebcce4ce2d1"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "876df54c28a915fe05b491b56e851e9c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "703ba8287633ac35ae6b9e9ec3f4087d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5323f632af4dd998b3a6980cc9b5e72e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "38acc2be04854c0652f26e23c3a3254f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "3911cc48854a956c9842013e78ae5a08"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7e4fe6d3248487ee882bcd334eb7af02"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "828b748199499aab69f922c8b1daf1a0"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6a5baf3782ae14f9082224e0ef038b4f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "195d0eb1cdca3d4b93ba9742a1fa3f52"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e6aa0dbd51d5ccb4c8e523b3e2449f1e"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e8a63c1bb3cfb2b12463c3ca5f09f183"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "87671b485912b4657f287a17bae10e7d"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "15674dc14df817f675da0c42f4a5c5cf"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3fe06ee3c84565e4c8768a94a738dd4e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2e79309940ca8a5eb4743679662aa215"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1b163d9d70f4cbf25ac996f46642b197"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "8f6b777b27afaeffe2b673808acb26b0"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 81960960,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81960960,
"byteOffset": 0
}
],
"md5sum": "55f56d15be3019aebd31203d7f62b7d3"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3141a0208888522b101b88a8ec82b41f"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7dfac72ef3bed4954e8ff23cb3b84aa2"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29434880,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10245120,
"byteOffset": 14755840
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25000960
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 25011200
}
],
"md5sum": "ba4a3e616ae089d3363ca5fb9268650d"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "60f539a3d046485efbfe53f3d8cbca68"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "34d5e73ce93d1bc63dd969d246e8e532"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "647c4b419e2de3dc405ef8f04185b6a0"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "929fe70a4a97ae0f81a2b7a7b3fe9597"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "82eb7fe3da250a882c1fc6372809d9f4"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8223955e527848968b6ad6692a9ea9f4"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a262db45d4e5da6f043624211b2d66a3"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "b83deada1c29276bfd99f9018c41a076"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "caf59e693913ecddb7ca360c1de8adff"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bd64c9d45500e55617df0344f8aa05ad"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "55b3ebb4bbbcaa04e775ed1e148e937e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "3d540661bc496fce1e687e34387aadbc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "858955b4b4b93f1d1c7132e10bf39319"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bf36b74df3887777c15fd50f257d23f1"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "737a459e9093ff85756c3e411c63fe4c"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "6f26b8f90da4df7eed8fbddbc1e0e60b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c9b8009bb7504dbdd59e0450ba5c5fdc"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2bf4276ec32b69570defba35f0aa8c47"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4fdb41e22024770552fac89fe1d60686"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "9a8e1e4a61ae23e833cd90719c0ddc48"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ceceef2ec44df087af87bc475c3453ee"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0fa67c7bc6e2372b08f7d89fbd84640c"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "eddacbcb02987630c61df7f0c5064aa2"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "3ff048d135cf3d25110a5e3adb6931b7"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "12911462a0ca3be9e2f42dc6b712fef5"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "dbb926cf39ea6b4a514ad6dca46a8915"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "902812e3738a645a11cf41032134513f"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33443840,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 28518400
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 33433600
}
],
"md5sum": "aeefb333c09c319a7630e3460391597d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6dc7ac4beed2f93db148306867ef31d7"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9848bc2351544bd4e7d46006447efb85"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7cb3f81fe4e4577f3b9a6da77db92645"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "db6a87a6bc6b4c6afd62200705f9ae8e"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f06b2db535925e243394152f096d4d07"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "542c1c85f1b16ed34f2876c470b96d8d"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "56fc0351d5c046d3f34078a14a063b99"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "5924bf2771a469f8fc41bedf9cbcb9f6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f2cfa3e2ca1674fbb1115c36ceb8d312"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "825a072c2de7022335856041152e47da"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3107a382c4d0ddc8fd26b8cbd49bb582"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "4e3487218ed472e5c5b1e6f9944f16c5"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3dc170eaed937154010399eb644fb096"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "dcef90a792e01364433ad3acc9616165"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fe863f81d5d29228ef068847fab67c9d"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "2cac949178271bc758b959d02b66d28e"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e6f9184e7501220360b12fed91e9c41c"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "84bfbf4c88924c7cb700b9fc42b07a8a"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "35042b4650e5cf02f7f5bdbe29b6eec6"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "ccef4c858bf6aa10298553821883daf4"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3893165fb517bf35df3af88d14b95c0c"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1bc7d99bcba46697896f0f28849fb1cb"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "eeb1137c6b62ce8a7ec05e8457d65b1b"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "f14fc51e9e702edad526da691a8903f1"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "6bba09e090e58aee56a39cef90d56eb8"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "217d59720e3a5f97d693b20671d6272d"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ba126abc9eacb51d7af6a5a6f5a1bc5e"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "e3cbf9857a3ba9d0159e067e12f36fdf"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3f45513abcedc6da8a7b77ab38a05465"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "04e9df5c42d3ee2171293f6dc6bf0d16"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3944c4df74036701d5fed9bf75508119"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "efde65ddd16c6b3c32e2987e67e6a1a1"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "97daf35cd859e2bbbe963ea3f6b990d1"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7fad816a930bbc7b9ac7327d02f12741"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "a583de836068776c1697df60947a2d3b"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 32460800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13281280
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26388480
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28037120
}
],
"md5sum": "35eb6addc99689f98a89c12b0641342c"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "36339e51807fd088bac24fd14e015a4b"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "123d06b2ec3dae7694be30e19d519281"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "402575be75c1388bb223a31d9d688a30"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "e9d49584dae276717620e03de00135ce"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "db034f359d250af29630f99ba04ad01e"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ee8eddef397931df15aa024b6c1abc86"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ad5f0dd29b78c04692ce2acba45ad490"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "3f59f0ff76cabb1a6109f6ac6d1c4da0"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "14ab7735130acfa139e651af21fa89d1"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "49a5974bde08d406ead78bced24778cb"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "29e71e7bef5340b9462fb5201fb2e437"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "09288dcae3c0f46aa7c2d86386c80d76"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c2a8d008c7ca9fda787ab785e5e098bc"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2c7129c84f955c0323ab280732f16d34"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0a5ecdec73798cabc16490559142ef51"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "053cdc9278198e855d9571cb88e6c39a"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "9de408494717b1e591d1ffacc65256d3"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "73a26c3f1cbb1b876e3f2c37c7ede188"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d05191a9c608814746b01e3bebd2f124"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "9d981af6f26cdb4eb302639861185606"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d04bdb5408fa2cf08e28f22f6e7f9ba8"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0596f459d0492364c40e603af9d73c6a"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c442dab0695ea5f54608ee947d631381"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "ba0fbe418a8784564fa542826438b55f"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d94fd94761e30d8aa51937e8bae8a27f"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a1f0a3af4d3a8a5db90a723f99e9f2d3"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "998b309d939d36e68bd4c59485336668"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "dc392fb61dfc3251cd3fc5bd92a38cc0"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "fa60bce875e75e76ab5d478be33ab41e"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2fd864cc688380f20a13f0baac8aee66"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "80d5ef8012bf9aa03c2a13efed93d464"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "9cf86e28c09c91012615c3ebc65872b7"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a8b9e52b1f7cd93e9377707684f3503f"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "69964ad2cc2032a1d0087b60122c6e88"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bc33011e9b2f09bf3ab29c76b76af1ab"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "80b0bbc992956aa91e4df4532dbc79be"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b6b9a5e0c8abdbd9ade6f4818b12fc2d"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d23f79a79cbdb55897d2620e7b215c23"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0334d968748820f0678e59831670d78f"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "a459a87c6718d47a08de182d4d6ff981"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "bb7f19ddceb1bf15cc9040e64dea96bc"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "86ad1612e5c96cbb63f4d1e306293cd9"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ea166dec252f0bb78427865fbb5f3524"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "62ad3e5fdb635150e0ad1bbfec8ee153"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8d894151163fd78ea7e5a33bcd9a798e"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "6a9bbab246fdb4d4e21db9c32ed75e9c"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2f8d0b4527a002c156e7a16ec1662ef7"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "697ec901a67d74cdc3fad331bf1962d3"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "cf32fbdae012109a3f3bc1547fc08556"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "24d0e3e9d1b0cf7684d80ad1900c60dc"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e073dcced17ea63515b62d9fe4b3b367"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "513713ec479a0ce3e2dab095ccbe5b84"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7d0cc88ee8b5f7cd06cbe992f96e8b51"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ff6ac82ce082113d93978fdeaa99c67c"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 28518400,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26880000
}
],
"md5sum": "69a9d21b97d30ced94ea1d1967f3b58d"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f5a623e66dd0f9657067e36de80a7e46"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 28508160,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4915200,
"byteOffset": 8847360
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 26869760
}
],
"md5sum": "0c2fa2268630b1145b6a2b7026f2a980"
}
]
}