{ "metadata": { "ParamSize": 709, "ParamBytes": 20481200128.0, "BitsPerParam": 3.644827456917361 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "474cf35921f4a698f1efd3023739216a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "5c419e10d2fab7a700286833179567fe" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d45e4de7f0f119a9e253afd43146b760" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f099952644deaff90da29d23d4e9bbdd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "57e2b2694d1f5226ba669c976d09f975" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "90505e9fdc2e480e384f43a84b1da842" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7d49e8eff43f89d86e1428c293517648" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26583040, "records": [ { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 10240 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8857600 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26552320 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26562560 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26572800 } ], "md5sum": "59b0616fefd9daeb65718c60d5a5e57d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b6b390d2ea435f9e2fba3cf0172ae529" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "af2e1cf7963114262f6fb577839887dc" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "b2e803a0bf18b5edd9a5744c91973793" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ab97664a260268f33f872ed6ad163e62" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b1aa007b21148de6a5a8257dce93ea91" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "970f898b8dca3510f62036d37225f142" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f4929aa128b7434e9ec143cb3bc7a6d4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "7360c699a477e9f82feddb0904f6ed3a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2ef5490888812af08ca35620b0bafde7" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "330d9570b01155401cfdb626eb39d1e0" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "896dae530ecfca1db76d9223759702d6" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "251f7f6e9225f455b388f18688ec2653" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2dbab84545639011eba85b74b84ad7e4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "07a4f0cc989705be7ad2630cd91559fd" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d6372e2d9ac081603b8ec719e52fe0d6" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "dc088360d5805820ed4826b78b637287" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "db3e089a68299cf5910f04e835c8b222" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "02a5d06f3b4c0b6763d34af08fdeea83" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "70956b0a96bd00cec4c42ae688964c72" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5b137241c546b3237880bb85a18a26c8" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "db44576e1999e1b74d978f038cedc3ca" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2f59233fceca8d60f511457487c683ca" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6f3918fe001fb1e93f639b3587c653ac" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f59b6286314d9a838eef95e663b0ba88" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4569191077ea8fc55436a8d77a4afdc7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "151e2d5457178d5fd993e224c991803c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a6fc70ea36586453e02c7b6d0c787b76" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c6951eb9622b3c2a2dc0cfd5f84c6734" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f18430518757a7ed9ebd20f6960ae00e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "35dbd0668a396c92b4a3ee4f0148900d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d36d80d5a4f55abae000ce3d1c1f9c10" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2075d3978900334e8131344b7894c7f1" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3574b5b379569d1f31cefcc6c383a82d" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "a1272a7611fd919eb4733d4bf55b1590" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "59422487379ba39227995e2b89777c88" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "39f7b7f3b4b27a2a3ada75e548801fab" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f51bd29684c361e1f6d0117441584848" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "754cef6bc5a13873b3bd2236304476dc" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "53782f31534ca2a85f32d908c2152ed7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3d75c3aaf2ea2fc0fea4cf5db9b69e0b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ac1aa33ef1833173858fdec322e95b04" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "db5a098c2fe709004b8878f404b38549" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3c1a9d32c229bb044649e1efe9d0cdb7" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "54a58c920423ccf173abf7be6183cf2e" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8d1db6a18e02120c852f72624ee157b7" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9c64530f351d20bae88f6329fa3943a5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a33eeeb9901c55a7f694c04e6c804b99" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32495616, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23613440 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 23623680 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32471040 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32481280 } ], "md5sum": "1f800d37a0706c2de9db9d9327553f4c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "658437bd0acda3d02683fe3bb3fb52bd" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fa74b789d06637dc49dbd64ebc8f2527" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f6a5bec08b363fe6570cac8fa087e787" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "51958fe0b37ef6b3e26a291a9a2e5edc" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "fd8c8560e3921d54d799174bc0674e68" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0d87463f196c71ec3a03ffc7bb9a62a0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ebeb473494d3351282aab190f7b45f49" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c1e0ef676f50aa87f0269e2bf12e3cd7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "826d08387cc0cadcf62731d8b43c44b7" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e0c9abf0749613580e5f70a41d1c3c7a" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a39e106e08bc31cbff36f12af2eb7cc8" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "19b9e61eb74aebb8ad7fb37eabeb46c4" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "75cf73e6a83e3743426963ae3c9c20a6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "24c3e3761f79878d66cd741113d05f3a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "acdc7a6d242b0bf729f4e097e1c311dd" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9f6ab4e17e385ff3349f9d8c7b6ff018" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a60a7944045d9a3ed3df64e3b1c2ac4a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "c82ba35e47df4d55eb9b87b3efd0df2a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3896e66b1ebb456361df46f328d4b1ac" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5a64b517566893eab824dbe74e104872" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "b94a2249243f305963b851cb0db0d8f0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5f8d6c2db3448d74f1935a4e1516ec64" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b53e1dee704cc3e591e445d842cc88f8" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "3cf57efd04315023166901488ac54963" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "60a25a50b45a2bc1fb0320399c761ee5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "db324bca265fc19c407d5d581b5b545d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b3d1d322532ae7990f8f06623d7015f6" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f0be6d3836e33c80b161c7fbc599d31b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "711c5c47030ec68fe975ae181a3f778e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f5aa304de7b645dc7ebeb7aa8deacdae" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "21b723f30946b849e841996ffc327b1a" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3cea37ce9cb964676287d202e2b8a6af" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cd6f6aee2ec795af2b4204de7174959d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c717d539550dfb8e677ddafc5662a529" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f6df2e8aeae22077a4d5bee04b9a88eb" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "86faf8c1718d42d6dd9cc549b7bff6ef" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6c57d40755fd1a0c71b9a8fda57c2038" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "58aaacf9049033c920cceac5890700a7" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "8a1ef9e55063e735d1b6ccd7d1c3a507" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f3b5460b06f7f927d699906433ee89b0" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "492db42912496705ce1557706d8af5f6" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "7579c24a1cc4940113fa11a443c1e840" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "22f5fbbcbfc4ad275b4507a0594e5357" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b1340b8f1b77c9d9285e1cb9dd6e87f0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "238c44410ff731bd9461c645b913ac1f" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8ae7c761ceb6efb091413a18fa5a7867" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "80aaa189232b3580f39f6aa6d5c6bd37" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "9fba12f0e106c699267997469aa8a320" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c749ab6caaef0014bfbe5c6c7962900" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0f9ecbf1469014f89ad8d807087b842b" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a08b298466ca3f33b24afd4571808aa2" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "99119024e2e7bedc6afc37b89e47b286" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "09a78fe26541fb2af4499dedb48fb8cd" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a743cf34e28a98abd68eb9b115d0ec0" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b5269cadd81ca49464c5b5603a508550" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "137d16676a58583910d9f701828aadfc" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c1de0778b15cf4731b0b09b00de1785b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5f36bce8c86930a92358f6dd821e28b5" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b7dc6524ce3b85d0c2d8aa92b70d318e" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "77f9a80b21eeeccde1cbe7090d230719" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f6ea6ae0a452da1f824aa079d1f4e7a5" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6fe836f0d5fefcc5551870fb2126091f" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b9434cfb80724cabfd9703f0a7391cd1" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a68e0d06698c042bc51e6fcb4340ea6d" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ee8d5cf73140cb6bee3fd2f926e81470" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "bb704cd48c1dbd461932ed83a1263d40" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6c0cbbb1d8b13845a35fadfefdf59c65" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5fafc800f14e408bc474c5107ff7f1a7" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "b919b2250f524d9c52b29a769ba267f0" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7ee4ffa72343b10c1d571416659c27aa" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0bb4b55a423c5efa131d9c5e5965b7fb" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "002b34d2633f84f724f2e7c161b2ce29" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "40f7c45fbb4d287bd7f681a1f754eb83" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7ff3957a437f0697f444e83dc24fc3b0" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "15dedee2cb09796bb235afa900d20793" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9e842dcdf6f47bdfefbc955b861045af" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "28bfc2630cd401347071b9e73f90fb15" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "23bca087ec1bef7eb1ca8aafe20215ca" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "468cf8a111f9d43963d7fb0eb59fb370" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "59157f787c21f161e82e8537d38d53d4" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b10352f6f9ea32f376fd6159fc7eb298" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a0a6fbf4f5f7350b1be1c90d2202b564" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d6fa49fcaf68c7dd9c34bd75feaf3dd1" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "de1a62fde33efb58a2ee483d062b904a" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "74d659d00a76183000642f04bdc4db17" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d945649937231041b6f8fe83be3a7f61" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9aa80a06c5d88b1adde42bdaf933f140" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3c88833b48306e500f9f3bf1adb3eb02" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "81da7116cbedb0c398c3b229217916c8" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "d12bc8093cab3f48eb62e1f45534e2ee" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5569687627e500bb6e27664a5193b4b1" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e626d9d7fe6a79b61f26b1036f0a543f" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "9eafe82fb908682a40f43d0d56ab6f66" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "be449eaca184b62643424b4330e1da9c" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "42ed31ae8874b5079f13ba54376083b9" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "36673864f7bd2a3027b854b4277f6e25" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed9dae59da8c8243bc33d9fe1f6fe827" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1844e6db8bd37633d4aa971f61265225" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c74fce8fc07c6c247100fb1ca1253928" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "58920d63c67a4ba9f6451dd80e79faf6" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a642a191c91fdfd169c88ff3866a04a1" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c7f5cf5911559bdfba8e82dea9dae8a" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c4f0df22713b7b9504fa10f5f1662902" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8c433f216871e1c22a5ce193730ac762" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "50c720fb75583e22fe8dbe326df9ccb4" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c51753abfa40fa03d48aef7ece43e13d" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d34dc269f27b17e009f76583320b6353" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9933fe01050d4374f44d6567d31c819b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6042497efec341607f91d1dbc813a870" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7f3b04d3c77f8cec775be59dc71ca8cc" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "255d4f21dc7260421acda108fa893c07" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "d5e92e647ca362c0b238f6c03e43ff7a" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c0b767259883c0a14ef272e87e5338a4" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "95e2eb687503e55555b6ea7ca0282b52" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "ccb5fdf69c897f2d46bc55b4c5c185d7" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "35af7d12d180c5e901a1f0732edbd0a5" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "69723427eb33003e0e51cb3c1b31bd14" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "252091ffd1c06c71be41ba6dec923625" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "004db4998f48b618b35daf40e23e9650" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f6ea1e19eb606c07b0a80b35e3db84c6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "45ee40609e725e711368d90ab1415439" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "66ad1ba3dd2d08f1379c289403573100" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "777ccddc5ac391871528512a4a6b5293" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e65eac398ac72ca3639fe0d99f7c4225" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5b2de47d73d47f6b7be06553b41a7743" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "fa52a169d099ae90681ff338e516650f" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7d6e68508685382ec193405a1b33aa06" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f6c2d6ce6697abd5f1d3b9d56fb15e20" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "46c192c8f7dc50f53452520d84d1a6d3" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ba89809265ffd62c034f73f8da3e0706" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d72ecb9f1a778bb39d8d5c6d7d11c61c" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1439d0f9788e31664f1ed56e854fcd0b" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "7577579e71fd80042ad98670c7c92ca7" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d95b57e323b13a2ef695b33e77ae959a" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c90e600e4a6fe9566d87d864233f905f" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "d526f5ce987cb120e8ca6707f2e8d6a1" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d1cae5826378bc01a8f739f7b0149a73" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8ae479cb13eaba198c17e7768a891b4e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "3ba011c68af665131d23911f96d01da0" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0dbb6ada93ae302b3581d7ab8306018c" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6c16295e564cf0e7d86e1d0d5c448d5e" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "6c01712ac0276a74c9df9b70cdff9c05" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b35afdd322e86b8c08cd595b5b1b9a9d" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "996e266aa824b6d5d0abfa028a4575b0" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1b375ba6e86cb5ed84e29ebbc3a23823" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5fdbc1b2bae6a02c56e7970b04bfaa2a" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cbe2e735a0cd0ee96ce3740f657fcf52" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c7d83082812a8136284562e7e8be482f" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "70d294267285411eea04cb7d73d90f68" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6e46f6f532e3c1274b7487287d43f513" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3211c1d243bb35dab7fa0a48b043466b" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c17f77d59c1a1c1f374fcb0144b54dd4" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b5815478deae5efdb2f7d9509f9d0b01" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d650a7ec3a07a8cfc8939d3761910ec5" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b006d8c6d39ebf18f989dd11e2c4704c" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c8ed2e0d7adcc7a823003163824e4b80" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ba26154f7b35cdba004a2712d0b51d2c" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "87879418a8c85471279f1d14223da5bf" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ec5c5198a73b3234a7073a5001a28b43" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "3343428722209ed709b5af4f3144d4f8" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "66c8465ba97a87f96b74be3d94d7f564" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5ad0c86cbbe9169bbafcf8ab85874dd1" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "db58575917b266e6f8e4c80894e40576" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e755333fe08f40695bd4548a9605ef83" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "108be907978c3088ec8f86f59bcc4722" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "4720a82dda1ad1ecd8b3ba20de6ff44c" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fcd72a3a5c98ebd460eb11afa47d9360" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6660968b4e9a38bc3b359dbfbf94cc21" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9a276f5ea282a98389de3f1ebfaf8287" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ee30d093f753c9b8746b886758026b27" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "33eb8c490aa7e414b4dc9b4d1eaa1209" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eefff15e54b6da3816b045648ce5310b" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "faa64861be464950da1f9528c19c2f19" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6a055af24dc11e6d0c55d7f0647547ca" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8f880893d56b7e5ac37b611b724ee2ed" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "65cc9a4fafc512dd5e277804a0e4a724" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5540c71947c1a85c8a9d7cbb4d2dc6db" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7a0497699534a410d9a798ba8e5fa8fa" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e42f1e894b53e4b624944172468b3683" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "04cbf86c40530cebce2c1ef8e7273207" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "19696c9aa2733a84381fc7169601aca4" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5115f42cd1f9fc1749158f7319cc6a87" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4de0f7f3a9e90c9f446e27af5520b673" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "e09bf32bc06d6489fe0c24ccafa795f8" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4a1a056b79807b9b47593f3f3849b58d" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "79a7b3ca1b4460aef033a97e6aba1893" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "e604c9b0523b44d1b6db12b49386a806" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d1bd2c63c8cfac9c7cdf5ff91a962d6a" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "953fa57e025850182146ad0b8ac6aeef" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "c8c221cd0dd7e36e41779b303d1f2741" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9324ede2ab41ebc566476e9705bce842" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7f852204a223ace71f9bb3a2e7816c2d" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d76f4d7be583a3b720c1a4191da01544" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fd7cdfbdb46af5dbf41e12208dc737b9" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cc69bbb39c00e9217f885bbc73c71926" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2ef7eb6a41cdc52963341dc08fc35355" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7d665eca2024535b7852a5dd2d28c70f" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6a3c08b9ee9002fba3a4b8d57e40fde9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "80791b10b536bf2334fdb760beb06edf" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "922847a1fef8715811604db80f79e447" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6b49a10e5bb5be45d10d30033b97f2cf" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fbc8e5d6202864f59f443c67c51921d7" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "55c17794cfadc417b2e02c8db739a970" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2f3d23715ba663f1c658d0bd544a409f" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2610da571b63385cfc1454a4607586f8" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cf76d65025c760c15cc1b4b986a95f55" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0bbf459bfca485c0fbc16eee2e543871" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "b22aebc390ebfd1a8dea537186207d1a" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f4db3aac649cde63c0d3b371e380f089" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ad5281f5a08b9a608a176d272de05fa9" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "a886f47606f29f5308ef8b76bae29445" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f7ff04f68820cae790c19d4b9ffbe4d9" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "51595640cedf7332e7220556042a942d" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "39549857e62db1f5ee64776c6587d565" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d03a90b5a8b1aec14ca9c6c50dcc25a2" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fda1045412ddae142e915f9cc7fa193a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f51ed539df7a5f7933c9ea6213ae2aa6" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e2e314c21372dcf905e020fbe054e3a4" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cbea7ce1b1ca7d7da50d1d7598d37df8" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8dc9538a85302aba341d57db58b14c19" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a6d1a544bd45ed81bf5e30685e81dfeb" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f9899d15fc3d1b970d5c41a2a69afd2f" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "19e10b9e21dc6fa483cf02d3efed7144" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c58131094a6af3ccbf97fd5639d222c8" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ae8ca4554573b23bcdbe56418ada72e5" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "008656dfd04eca1eed1053bad0ac602e" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "97a3b5a114e2cded562d0abac4c541e9" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d20db9d869c22cb1300739d46a14319f" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b8ce91061b026ba5babf6a9220252b2c" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "79f150a82807c8d0c366eb78d2dd10ce" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8bff366720197f7ef436c34ced85cbeb" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "12426f291e41807256dd26cf42b75eed" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a06f97dd17eb9d6134d63d237e0327d2" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1cf743137a2de56fcb58e5bcc4dce9a1" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "0291319e51f6b1104d534158c323e72d" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "011e248479d732e1d1d67f4385b5e656" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5bd75190cc1e60f1f9af73273ac78941" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "643ef38c77e346e9c9bfd82c0e5d8d86" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8272d3db7214124216a1da7ae8813848" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9c6071f5b8f748a212a48df1b91fb71e" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "aa2ca0be93003f8c1bd010b4c4937d58" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "191040523f3921da9e6ff79685f558e3" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "3aacf7ca8cda7efed4d98f4d00043c8b" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aedcbf793830a61937a79c915224ca60" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f33dd19447af7b527226f3da87ff40c9" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "aef690d2ff8ee3ab9e06cdbabd55b31e" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ba37594337bcbe24871d5ae685ad9057" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "daa90c3c877001ecffb1204c59b51bb5" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "052a9a1ccbccfd5a5b9d64f13cdee5a7" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b47aefb1a9a03c87c30f0637d222eeff" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "041484322413acba834a22d6dbf3d7b9" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d94024b71be07b8d5f06d51f59567e20" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e0a600430f883a49b91ef83998982d3f" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "10c8b60ad7c2f421ccbc8dd996a4a3cc" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 17039360, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 15400960 } ], "md5sum": "6971b2f4bdb3d80c83e473597bb127ec" } ] }