diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,13367 @@ +{ + "metadata": { + "ParamSize": 885, + "ParamBytes": 40900313088.0, + "BitsPerParam": 4.157646319274502 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "30d11db2a2a2b452819587f617348bcd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "c7a720281767380e82eb9d636961af88" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "efc5a4d6023963b0fdb2485f51a85ded" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3d2a3305fc86e14409b36e5edf37c57d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ef9912189d9ecc6f5e05749b49757900" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "b489f8b3baf75bb5af31637e9460ab85" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "f2705d64315fea4497558a806b184947" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a0214adfd92df30d0f5b7e20458db331" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f82bb1d7e89f9cee883d1022d3354b20" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c5111f7bdb9f282f897cad96217d13f9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3423f3092126e3f49b2fad8f5ee196c9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8de43e002d030d0cd0f471b01b156dce" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24645632, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15171584 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15187968 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15208448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20451328 + } + ], + "md5sum": "844afa4797a72df862cd3bf83c1199e6" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "030fdb728f273b45d636321fb9aaff7f" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d9d77b9848b3478f2fb1ca639f4266e9" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6075ae8f3235d1898d3d45c6f67e1413" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "83b060a186660ee4f8c842c34ee135be" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30330880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15155200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30294016 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30310400 + } + ], + "md5sum": "0e91848b211902909b21a39b87b9b7eb" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8d91e11124a4dd2aaca5e627e915f9a5" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "245c688958413f0a54057302cc0b05f4" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d63f562b0b7e7cae3027be1b7960c492" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8781f9719c19175aa1b95811a80826ff" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f611656bcdf6a47d7554b5c578bfdc85" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4e7e6677598bbd333cdbaf899bfcd8d2" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "5f544120d594da33c125049771645c1a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "98bc536aa3167ed2ea1414677a4cee3d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "43c1fd4f0358427832957b80b2245486" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5071992be4c8754b638d6c37c39e7244" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2738d846345f44094d413981387d9d96" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "88207d909c7f0c45143c7d2259bb58c8" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f456ad4b3b5b515903128c7ce17c834f" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 28856320, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4227072 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19382272 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19402752 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24645632 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28839936 + } + ], + "md5sum": "2b6d0364cf9fbaba4dd0a3b54915afa2" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "34d98dae2d91d0f201f4a6eea78122a2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "aaf310185848cf68b25595307efd8997" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "402b9f296a68811eb0e4fae98f277df1" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "38d66330920856f317826ee2347f148c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f5ac7b09a35be7f9534fa7afbe58db7a" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "8aacce4cab3803b63c98ade600b4beab" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4d6515b3113c6042cba22627fa959e0a" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7f9e6f94a0c96cc6e8c940c566c32399" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e82712eb1bf3239ef14a337bd069188a" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fba44e498a5e0e1144238e03125a1237" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 30314496, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30277632 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30294016 + } + ], + "md5sum": "7529fc653fecc329d10e38579ddfe5b8" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00baf43388df1406d07408a93f06cce1" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c2cef2f12751153b2a47a12deba2fcac" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "84316971ddb8899e9419fad78bbb6c2d" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4b235a7df878a9cfaaebd6fc75fcc89b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8438a1ae656e06873b006ff7960c4b64" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3bae5c64d9d54cfaed38042d25707555" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "bb936a9263bc4cd889a8205d2ef82136" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5a0d494f8d4fa2f0fcc70e63472c9e36" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "80c8e8167bb438b76e785f31f4dca06c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0cbc32526460f40354b8d46677d93294" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "401ec7e2142af028a26ec8b3a9f165e3" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4f8083954c8e857bc265dbc4e3f3ac15" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3bd96159a9c6efae9d9511efd1f0f803" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4227072 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4247552 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9490432 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 13684736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "7ffaee56d9d0cf6ae1c7e28924151bf5" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1ce6623dbc4bf31837715e3b333c1095" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "610651dd962c853d9ab359ef8a0a39d6" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d9f92b928179ff72dcaae938ea4ef802" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "875f89c1487316b63079513349019cfc" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "eb7783febd4b7c141898ddbce9744bca" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2d4a33ea7aa987f622d9e2d762958143" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e18f61c951673858645a7840499ff108" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9e2c8b2b22a8271702e9ae55e4ec4f8b" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "b0744e3e9f6e9b2d045a0caa0dfeb5f4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "aa14d15e368ab605e23e5b6aa11362df" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "cc86ff3c0e128ed1929cb9f7e47a76e3" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "262a4059533a11bcad272b4346e27fa7" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7a79d2917ea914e09f1def43dec22d91" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c24f573172e32c76116b3fcbbb50109b" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "81ddc128beaf94f564e84cd913cb0580" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8ba398ee5890cc446c9e9b376f597c9c" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "57f18d568749adb96b9245d20b4bb280" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7e071bb211033860d7be4290cce90138" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5ae8321205f8e65c7721e3b47f026ae9" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7b439609152eef20a9ca82277a1c5856" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "19464525a73a05be284d6f9ca89acaf6" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7f17b610c41e525f84b05ddc11f64f46" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f946e97ea91bdb4c5a1a0c6811ec4416" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "f0b4efc69e6798c2e8874168e9fd5427" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "56cc7db1534b261f7780d96ae2c53387" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "98181d47f8909ed19ce2937173b4f54a" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "97dac87980011c26de523b4cb0c075ea" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "97dbadbd418c1999323e6dc6a7270db2" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "fc2ca62091095849cb5f3e94d09f4170" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "38949950288a4bdd2e1e2b15759f781d" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e3ab5ad864340e24763a653919603df1" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f25f20ba319842f0c452b611451aa800" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "11e127a7d75df1b8092df3aad5b902f1" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4dfd132e1fbbc278550c6a1b5ecc51e1" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "904593c4515208066212a6d7e4b6f251" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "f2574725be72fb83f620ad308758c325" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "926ddf187b8b0b27e45b1ad5dbfc5a68" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fbed2c0d924d0ccd9a22ea29c771337d" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fcb888f244767c111d4c22def80ec262" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "78025a523dc9ee3f453e5728990e6a1a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bfc87abf59244752d7d3b4e712c721e0" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "034e57bf4ac681111463915ba7c5f7b7" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e093de6ac746638ba7175ee5bde4c1d1" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c2a6ae61a86785dd8fcba08ac881cfd4" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "54ee33cea8e9e2e7ed5d81a20b36f8c2" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eb2f2e8adfa7e85fa2467a90bdb19994" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "30620562db8dec05160b47aae3f5e257" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "399e90ac936f4c9c47fd61f3eab130e3" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "b2b90c1a787844ba51bac19d115e9f05" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b2a1142fddfd8ca3289684123f9df0b5" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8c28eda41f9f36d94e53d03e1415919c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "131f3f4e152caaae411777ddc881613b" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1e62044df534eb72f79b772202a84226" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "27663b9869f490d906a5d22b1b914bc7" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4cb0bc6d806fcd3b890206eb3dc0b0c9" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "96b54daec0e8b97c14c63b8289f4f6d4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ae7abbc6a48ad27894bb81e8c193337d" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3cd4e90a874c97c365dd97dd11f2b1d" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "35164610c99f06f005d199fe01500c87" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c916cfee05d7b1672c95c255f3ec4ad7" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "833737d4b02a96eb33f90386f146a1f2" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7830f92830e1dd4a2120609662d786ba" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "82898177a9b2e22bc4b5f05612e8794c" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b83f1bf4425630cd5bc1a700293d13d5" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "532d027278f64a7186d4c133e2e7c664" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "a20c9a7b8f507bd79560807ce2381449" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "aee6d58aa28a37a39009c4ba6f0e7158" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "def4e2c3d6283282f871f8b3b59e8bdc" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2215ec6c7d888ed2931b7256eb2ab3b4" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "83cb06497eb3ab6a3138f17b31643c60" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "92a0393013ae351eefae7c5ad6f7a48d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f435d6c889ecf26d3b316d4681fce60d" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "23654f3673beef28c7639a44dac6c094" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9e01a60de74ad6e56eb96d00e527b130" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "272ce3fbaa0b58bf24d7d677c525a99e" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c106ba2a06c4b63763a1186299af94ee" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a514a3f1c7a71c179c31107c598434e8" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "783e730e61266f5bacb8ee075dca604d" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "44cd5b30c93ac78b123ef63a6f155ff6" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cf29b4e30f5a112b4d03de4ce4fc5caf" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "25ef9f6b4d4d91bbeafb20f6740bab1b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d8d3683a6a0e4f4d07d15e0d4a6d619" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7e4dfa9604a3b299bae5cfaf71eeb980" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "a47f73ef7eb330bb76b59cb138513d5a" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c8336a31f19bfffc5ef8d946d2f62682" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b23085723859aa17a29a49f15110a478" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b8a3cd65ea3e57816d05af49e2aafda3" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d81f90038402b1eac77b3db301b0b251" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "54b0612bff3fd05576a19eb722a7a9bd" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "cd0cf913041bd3483f82f5665eae40c4" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "92abfe269c262261df474a95449317c2" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0ac202cb157112e5689fbd1144315dae" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7e9031ba2d01ca1261752a926306c873" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ffcf25dfbacc7c2a7e62fc815b6a46c1" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b0f0eb6ca00ac61c498c04055fc42c79" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "ff477a37a99fdfd82e674d933039936f" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "782ec85a495ac30c4fec0a02caa59572" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2114bb7decf488e85718256cfcf90371" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "54410ef34ab8bf71fefe4d0991940040" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "952d12d28eec59c0e1bc3d592d5a05ab" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b591049ba6b2c2a13a2399258718c45d" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e9beaf553115aceaff1927e8b2dce36a" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e1cc5f628eb520091edd36b7ce5357f8" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c691e5d030307c0ffb2e32ffa00a09e6" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "568279e7dd385a577a9ef3d59af8a07d" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0428e7bf8c75e9436b320fe08d1e2b2c" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "071602cb25d3876b13fc6282a33201d9" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "216461f18745a660fc0d05bed914ac9d" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "dfa2a7b61d2442a47e700b93018366e8" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 28823552, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19349504 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19369984 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24612864 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28807168 + } + ], + "md5sum": "e075ca122e18d286e60fd35ee7dbfbb2" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "81dadd0f1299a6aa5ddc018a879d6c71" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "82aeb9e3f78ea76524d49abbb7281b10" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2525d9dfbc561e33b0de541e5ef2216d" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1d81fce7c5907d2efd677a5f6c3d248a" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b03d6b1906bef323218598b26ca6cff1" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1980f808640f2c70e141cc9f64a4b9f9" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "803cfa621172ff8da36114cba82c2058" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "69d08366c588fcf01a572f1164b50b8a" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "44930ec023995f529efd8a90a917b37c" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "81288471238a1d6ac1e83c4720d2c86c" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "fbc23343baada5a7a40729fdf6639517" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "3cd15791b38016fb4ccfeb0581837678" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bd6eb42a2d132625869e1204f00724c6" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ced0c42aaadc6a9b4c0e47de2e65f4c2" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ee9ab82f1f9efef8f5a497998dfe6057" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5b8f662012475930fcaaecd2859e119b" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ddacc989494a7896848e7cf2fadd3883" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ba21fc287d1694c9203472aab1605311" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c880a5c153daecab82d685ea389ae99a" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "adc1ed86c888c59613b10113f405ffcb" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "4c888da63e452ff4f587e2ef558b9540" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "79e803c9e4686cd06734f4c03d112d28" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ea82a9e83a37a8789681e6d13adf3908" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "52f41d4dee56fcbd2fdc3455d7245b77" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6966f1e156c2a5b807bb5dea62ef7e6d" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c1f834af18d36f642a1cef6e71b8bc03" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d830dc880418f22830ee1a5a915b3fef" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8aed22941def6c27f324fc6d47ed297c" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ded3031ae1da6ca829691f69027ac21e" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "70f1d485b2419f0444dbd3f086894dd1" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d9e5c301cb1564449aa14e631dfaa475" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c39ed812b6d8153dfdb81096013085b0" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "47a95b48687fb85007a6e451e0262b7a" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "04f5eccd6fc1c7a9359083c9ce741d32" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c56cd5997c68bf2fd43941aafe8b62c8" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "e8defc26e93a7674da8e0c0271502800" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c09a16860320003b32fd5180dfad4e83" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f3a7e722bb4d3c0d62da75456a26140e" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "91606411d156c527d05e448d378de8af" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "51fa1d098e32d1a91837de61d5bcd9d1" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "140c1f1a1b5fc12945ddbd8fb9b1da5a" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c6f8d0d859b403147032526e78a09026" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e5843382526d74b3aa84e7a861684e26" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c188c22a2bf13655a8bfdd6a1bdfc77d" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e605e02f36610fb848ecccd25ecaa077" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a04009fbe5d1543f1d65d48756ce21dc" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ac10a67c119aef198f9bbcd3aa77a736" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "f8a6da573200f7050e97f776ca362f6a" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e8d75b9ef425af0fb7e0bf703b2f50ce" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "01ac651fb66efb8a02388dc8f020dece" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "08c26336cf861ee618b97bf65344cee4" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e1dc441b8d8b37d9e812ce5355e2ac6" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ac35e3716dca9b9169af230c86d953df" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "e84b69d4b26a07ba8472792656089bd1" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "242154bd3cbdce956f434fbb99b51750" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9db809b381879fc2bea5dcea84c5b599" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6f7f209d2134d8c64dccd041b1da7b2a" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eab495af81d12e4e93c60f9bb0bdedb5" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "016b6610d18b0f5aded90b5ad4bcfb0e" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9310e3400801e1b24e623d9e7981f668" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "da24567e111e7b6e43f2159892a38ab1" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a0d4c3854e1712f4733876a78b209243" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9e2dea311402c46a2a632c1b44419193" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "87a159c824a9c39e3d7104e75b44eed9" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "39a78e2860bd20581870118e5769829e" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "61240dbf32c0c35de309e7aede033427" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b2475fba53e11b479faef7304c8153aa" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "21c05b439e96e6d938fa9a28d5eff5ad" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bd47f3615901a24f757cf96fe8989678" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1be65088c91590c96cb3110a47644705" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a4618460f0242fc2d173e5cb18d2de13" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5b3f9bb7a7867c701c03fc251c01e30b" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8514c15567546d3438aee78ea99bc489" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "690123bf5b1ea221ffe37ec9539c53f8" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e63cd136195bb5cb6e412f3a5410a5b5" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "55d587aecad647e83d69b070f660ed1a" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "254796819c1f6fa023bf7adb36d8a09f" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "68cb4ddae138212689bb7822e2e2d4eb" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a5629f982aca2dfb4efefe8d2da5db86" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "08eb15973342a5ead33c78bb4a960893" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3b5abc16a3c4df8dbb76f2994d151940" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "221809a84891fa21135a538bc576eaca" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c363e7a94bdc953dde19baa10600d7f6" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c24ad7a6803f89812155e51c5719d03f" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dfc7de9cf8d6f6d717be0d48f60ee8eb" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a917180b5f182fec73487797ec7bf668" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "d2cf2863f1efb20dfed3d6be5745c4ed" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2be5149baf88c0bfc869d0f4d3c5a907" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5abf8b007ad0a073cb6d65411460ea64" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "77b61a9f0ac726c500e03f9efb2e9361" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "be94885403682ff5130cfc3df284c8f8" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ba157e87cc6ce6e11434b1039857e4f7" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0ac1b09723bcc8ae40cfd7f49f3ee93e" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6cc1ec7c1b5c113e3ce0d568fdfd53eb" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "366162c61d4daf8014f45182f5b31289" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "9f5e931a5c85a0b69c3c6fe7a839a1e9" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4a0e7f5a36a9425baee190b005aadf93" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "62e81b91bdca6dcb0dabb981245d6262" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1d57319fea60b1b9a974082fa2b0305f" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0689c901e9e5c20a35607a38e95185e0" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7161bd3eef4ce3fe1349af12d1417713" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "cfc7e1ab67b675f07905478425db952b" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "733c81f06eaf67b99c6740bc021cc335" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "146ef7dce699bef330a8494a05d56961" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cf4f36468ee7b0915c3e2c0bc1af42a1" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dcf7f3ca48415b258cf89cb52379e0e2" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0ff40793800644250fedd7a31f560499" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "1839cc504dc10299caa46b6d8f22171d" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "da0457416978c7279c9f6de055311751" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "25eccb162fd169e6db3efbbf5d6d0181" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "03317fe275a5be067b99f4fff8b19cd4" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b348c5fa60d5db7a204e862d01f40b3f" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bac42d3c453c9230769e20e49aeac3cf" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "879cdd060b5f6b79fe000d655b22dd4a" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "749f4f33c629c8419cd67c240b295600" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9d41720e4237de8da9bc185c9782e606" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9bfeefa03e66a2c5d16bce44d5bce41f" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "bd1e89aa3e6c3e8194e280204f574bc9" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c8cee9a947e53c722889e29044ebe569" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "7f76f713ffda86c3bf4d64853733487b" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "eafa67172b985d8a5a9b9c6222b94f28" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "3f6d3880b5d71f652e0ee8100bed60ca" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5152d69c18ce695ea66b8ab640f5273f" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "98e72dc92e03e58502d52a7396a4c5bf" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f7c87637254c143264b642c95220b219" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "db92b49a762b348b46d3f0bae1487aae" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "af059e4e9003ccb863dcd1e37b845ea4" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0dc0a08585f8ee339c3f694d29983224" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "dff5b2614a4e2bbb097453f8fa6f9c8d" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ddc17834fb59bf00d61570abe70ac8d5" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9073138304ac738f29887567448026b2" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6a200cf9495f440936be571acd49bda7" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5b2c1ab4056a1b489866b239f86d7be3" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "ed0565bbe8c9825ef11801cee8118992" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c406fc819f51235c5ccb4b16f8c3c3a8" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9df8288d13d50609e92d183de20346a1" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b9f7d6dab45685f0716c610856d3eed2" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4c6983f4e3757684880f826b10deb7fd" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6c2390689011d0fc546753047a8887cc" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8530e753564ce4c2d28eff3141d968a3" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "429c69a02c3accdc5a1e8b48a1b57eca" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bdf328d9b0874ab200df66ac1131a21e" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6b8ecfb8f300df0c920b86d259107a0d" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "82874e7e99ff5ebe72135ae8ef533618" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b183510ca82d6bb277c580118048f95d" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "0916410a1ee2a98e28238018e95c1f70" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7c31b6b167aeccdbaf06c7cb3f255673" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2fd56fc14890a9394d2f43eb5660d089" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cc957fb26636f964c43b31a08d4a3ff3" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "97768826e46dea244aa7db94a6aac097" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d93ffdaaf6e8d5edce9fea556a8cf401" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "91b5a518470d916f989000fb5e62b810" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9eb3ecfc951e13ab6ec9ff137b331e6a" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6068d2c66b3e7c8d35ed3b8269cdac0f" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "757e957f976dd5168c663d165416f8e8" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4f00b2827b3f6a50a6308cc62dd479bf" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "122f5d8ecdfba43372132fa8fe0a77ef" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "6a14288415654e9dbfdafee54fe1c48b" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dc9428e70d823875392103b7e13ab344" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e9742a2a4a14e1c486d870755cd0676a" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "051b9b6464c1ce12ec8984453473d0cf" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "410d0fc325a3b658e308567d7fc0d32f" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e9e999fcc79da983fe21bc2b13d6f65e" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "8debe0871271d70b879c18e3c7edab96" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "228a9ab6dcdc1be14218b52b333814c8" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b379a8c9254c902e344385e558b1738b" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "43ac47c5c17655b6bd6277d37b3c33d3" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ec979b5411905929065bbcc8424c6341" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a2868c60358684e8893e7a9d619d70f4" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "478b736d2b9d84c7c8cb45d6586411c5" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7c44b1abba59da10852da5af41e8f466" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "867238431f3a09d67e013a644e904afd" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ce34fffb37f21c170f1b42dc98e97054" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e79cedac103e71e7f569c0f2d48da0c6" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c16adad34ea1e8912f6ac113bcbfc4d8" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "5d5483f61eb8abf50ce87f7087e5e561" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a66c2bb5d3d88608689751201b184e61" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0a5c483f9ae5cbe09c17d1ce8c11714e" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "406a0b47bb258d1fff1861ddf3b2366b" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2087f13437229f80b174536881160876" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9d5036d4d64ecc353f8fe6ec5a88c1d6" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "197a864c6a166af2cbed4d0a8cb7011a" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "5c8c0beaf63d6b3a808274ac5d83eeb3" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2b394c2fc7789c79206d4c71b6c1ded3" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "198965f740f6c8f3d097360c10e1ab8d" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "98d8dd62113646a3161e4ca5ed78a0ed" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "10bfdce80b81275bc6af48ed045dc43b" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "15abe0d007898c9a0cb735c2bb42a9f6" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "fd9606f664c7dcea44d56a200c0b5b22" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "60e8a32bb09104a30d606f63bbcd6636" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "899eaf6431a8b281bfd34014237b6691" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f9ce702e655101185e77387c0a0c0c8" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "aa25723b525a7b4e7ba86347f4d44817" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "35f28c276090b28aea27fb6343401a58" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cc4e3eb2a93d0cbe6ede88692a5a5233" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3edf2dcbb8ec6bcc62d23d4e12818c6" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "0a3e54e0183143d9da5eead8ba5e3bc5" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "54b894cb6f53e57b69ad271af728c7d6" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f946315d0dafaeb3bd1ec2169b6c3fc6" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "b4c939dca95521ebe69a27c6be739598" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "56ffd6eb7d264042e51a32c0aa2beb21" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d9f6b3e6e24a45e4cf3a329d00589ff0" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3569a5ed3c3fbd2e42f772aa0d7193cc" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0affbefaabfa3e7f528210634643777c" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "22344878eefcca78bf0b95c8b78348a0" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cc9fdf86e68495f9deb0a5315b956ff7" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5501a278e467e694d634722636461b2d" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d12329d9c5da1a4667ac7f6f0b4baf83" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "dd62d217c4315c9341d32be925254c19" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2a280c73af7feaf998dc29ded47ffcee" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "975227ab0a5cb8d641a04426cc0ff26f" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "ce99123509da85eff38bcf10cdef0f4b" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b9607e7a1cffa1451359ec0664b2117d" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "de8aa501b6ee269fad4e2441768dc5c6" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7bcb730e14c9105e058db520f3f8cceb" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b21824b121688e1a1bd1c5c48e605359" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ca1cb8beb8d828a72d508e91542f5681" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "6219d4173217e73a0a985873dbcff0e9" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ea3741b73da0944e8d41fe709e58ee71" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b5a04f611a6487ef28b6c67436e35fb8" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6fb1ef25a44b2118a13680282d9fc1b1" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a95a50a32f3013ba2f8ed76391994789" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e187328d02180400e0568a4269eebe11" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "68d3b6f48d747dbafefb2127a3abc0f0" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5ebd72bd580254db8cbecde89a7d1853" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "11776999b65bd21653c7e27f34522bc6" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "93212a04fe4b05007571b483d3f06261" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9ca5a7a3603e388c298c283b5672024b" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0bc56e751b4b0bbdcfacd32b473cf361" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "b6e19f48b43d091832d5f9973c5cd801" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dee571bad8c7bbf0ce5f12485ffdd936" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "641bae6f3e2b32e468dfe0194435eb5a" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45b3fbb8d97100c3ad365c025668d8aa" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aab9a746b8737b1afe86c6eebd0547d5" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f08e53a80c92e6fcb6b4053a97dfe69e" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2775902a9f956fb08805d645cc72d692" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "3af6177e749125b9769fab640b2b3766" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "04a5343f8d5e719609a2bb7f6ab298a9" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "754ccd32164e348e2db8788001a93960" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ba584db4b5a0499125f4c743070adb7c" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "aa7ef7e4daff47f37de99cda5b51291d" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "8cbbf02db90815237ac4973a23191fdb" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8b9e2717b4aedeedab8593c519d921a6" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f7d33ac8ad053914ead8e16f1c9f69f4" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3757b67e1a96afc553c7a9dc570a8fe9" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "49318eb5f9cd5938d050a8eeca1179d5" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d478d3e7065ffdf0fe124ddae56a63d4" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d758e8a2d0395e85007a3567988dc87c" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f194aaee8e58d9c8509e23d0547b1ddb" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e661babfeba2d4fdbf05aa37df6b2ef2" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "886dd619758f8118ad7e9f81cf167c50" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8a2c9ee4ceeef36b536094995707d458" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "cbac2c1f3d40a846e6f3c7e9b7b08610" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "3ffeb2be572b50271767d6f63e5a2110" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "18b1f62de4b98f11c5bac2073036cf63" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9359e22f897c90273cb8a030f8759243" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e14053dc2576c63f1dd961df770c80a6" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8cf08abfb67fd359c21be1e4cabcecf6" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b3f3e0a3ecc0d9ec1f63539f8ffb95fc" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "61b7651af3a99816ca8439f4709f834e" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a1a470511c6dc3b6284c7fb6bc102b68" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "33f19f4f98a60c7315757f12ee2c96e3" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "d59fae1f3e049c08606727c532b68550" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8cd60dc768edca699e42b6cc535bcdb3" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "de93646a699aaaa1f35ebb479bbb3ea7" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "df19e6cbc44f9bcb54203bb436d94612" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a53946509d8ce23866fb59a9609c250b" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b5e3ec51d902bba19a79f513b662c17d" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a9fdcf8ead7049539152a25f36c1dfd2" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "93e4665603a180b2efa5e78b150cb9ec" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a9ddc44486dafb7f818e4d2d2afba17d" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fc36cf2ea13ae82de753d04bd1638cec" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3e2f7bb7d52c705e94e47056978444d4" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c038166238a9b4d6464d99ed89feea92" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "2fdd466d5b16c83af0d9313af1e2ba7f" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f1d38c861a774e534bf4feb01a423f60" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "044446a47f98b20bbe2439067046adac" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "309da4b460fe768313eb0144c60b670c" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bb6693096bee31c6a28093a2d1ae5c42" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "efaaddc4dbb16526ec5a4d518d23010f" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "199737643fa94f65cb31b81791d574df" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e872ce655d0dab983c0ff4effa1cf26" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e5931734d92ae6b9a7fea767e3757f94" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "a749617fa7607bbe906aa7ec74a3510e" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7a6c6b2aa7a3a746b7b8d842ff9ace95" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f6a1fc99c631b486f791d2d874b6c9e2" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d5b65824a0652d2abbc22c57bc7bd225" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c0f8e5b79c7cce481fb57d9a3c7ecfa1" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "848692f2889879918a8a48ac197e77b8" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "959fa36c7e102c739935545454afd69e" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5eee16c322368f6fd119dbaa6f28d3a0" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "aa14743c28e26f4651616dd03ca820e0" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "784016bf40e77c153531cb0d25c23a81" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "22f85ad12daa6142df555a5cbe6497b4" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f622d137e07ebef6e59823881ded14fd" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "72f555f6c72369a80d7329eb9492e3b8" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3c771959c31f7aba51fb15941b2ffd23" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e63d81a1f2697dc249e9987d7df1baba" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f438c0888927d64ddf33c9e5ce4b760d" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "658729854429dd3bfdabb4cc2afaeacf" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2c53850fd0a204bff97148a61bcb1ee1" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "164a970c3d03489214ce7cd5a772952a" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "3419037693fe1d3ba365dd35d3f0bd12" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0af4d30155b8ade5030303cf964a7dc2" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "19c0a329ce0e3a88251513a051b0e712" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "87a08a15f06b833d5ba4f05c67df441a" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b0436681d4d2e4a7a6fe0086f23e232a" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "1fef68a37bc986e7bfe2db3e24dad47a" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c653356b693b1539e9c13cd04d676655" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5e65410b87e2fc942ff6495cddf1f93a" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "816793a62489276212280df16016770e" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a8c65eaf781f9093ae73d98aa63efaf9" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ae6ca6e4cbbe9a752c656f148412e18c" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d3d841363a40ba2c6f20bf22fe2bbc3b" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "134167e0ed80aed3f0cc2666c7aada95" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 4194304, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + } + ], + "md5sum": "c17796e86941042f0df8587059ae5273" + } + ] +} \ No newline at end of file