diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5695 @@ +{ + "metadata": { + "ParamSize": 405, + "ParamBytes": 7322019840.0, + "BitsPerParam": 4.500366420537488 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 81920000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 81920000, + "byteOffset": 0 + } + ], + "md5sum": "8d7ef6561545ce5ba7d9718c1e10bf85" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "97d7cdddc4f57b735d5b1f4cbc115f01" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8886da7dcca5840a6fd91cd8e7057cee" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f07ead40516c414bb30c1baf39da4a2d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "86711329d58af09f3378c817ca62652d" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 32399360, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240000, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 10240000 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 10250240 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19097600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19107840 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19118080 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 23541760 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32389120 + } + ], + "md5sum": "3fedbf835ef473d7b1267c684a3b289c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5477dd5b09abbaeb4c923bd0498564ed" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0ebca477af696c77150917049278b1f8" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ff2815d5b17cbf0c007d69e3406b1aca" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "38c6c0885588193e8c0e3c71b6fabd77" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f1f33d1c82b51bafa63a1636cde290b1" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0d4f235b4e989c240cfbcbceedd6311e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "db8c5c86fa8de7d86fb7e223368a9ad2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "3a0e833d9e6dd0e81c04d7b4fd02ad3d" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d27902cb4ee124ba9b1738ed874e97f8" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1e5ab3fbf35d7f35bd93d4bba91bc9ac" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "bae5a8ed37a3afb7394a7e63304b070d" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "f5e8ef411b584e9cfca9c54f91f339f6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ab53cf612f75923c642d71f058920080" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a1eb82d39c17a6d042e0c939f4eae2a5" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7416139905af443334c10b18234a8502" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "4e91ceb11b34650968b00bfbebda6eac" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9528ba57b1cc73a2f06e5c82a12ebc6c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3aa813f76c0246e12ba9653e1e2d330c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d6daf47e9927ec54fdf2b72f13e5ced0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "b64ffbec3e505eb7b11d26052474b19c" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6e982c53a05e086c079f2df8f0d7cb09" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b011fb4a6105112ba41b389521af4367" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d51be98aa61f6361e061aedf81772b5f" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "dd7b18a103f90eca7caf35e32e2bc6b6" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d9acd101b8c5e00b39d496a57aba4df4" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "da7129d12ae5028520f4b948b07a5695" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "004502701ed469bccbb4c573ee8f5851" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "10d0686b21160db083677e555f57da8a" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5f5df7ceae0b5d80d1400610c45d14b4" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ef0be8ba079d632c33ffce3998af9adc" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8a4b3fae78d1bb9cf9191b0ae1b97d7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "6e3a2f6a4dd6bdb67258081f203eac01" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 81920000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 81920000, + "byteOffset": 0 + } + ], + "md5sum": "6762e5b629c84998f4071de0364f2427" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2660ab1c0660715885a956279a464364" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29921280, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240000, + "byteOffset": 19671040 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 29911040 + } + ], + "md5sum": "7c6404bdf70bf0971d5d19ca18c2728d" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5f27b7d0f803819b1842591caa98bf0d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "40713a2e8b7d44633fe71765e4e2514a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4918fe4bae6d8db2ded8ca458458d5c4" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "5af7cec14ff6c992161ec744267840e4" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e68cddc245abf3974a2f2d8a2879f7a5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f73d77a6ee7f3b1455832983c66db244" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d95d0bae18601587abb13ed9a0c6bf0a" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "b287319cb75b08786bbbde4c468adbbb" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f78a52cb3586054efed67b73512ba01a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ff5dfe66195a73b0da36f2c79d637d34" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4562e3fe7f6f7f5f2f1da0da1fbd4196" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "9533ca3957872f8137c9987c304cc710" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4b685820655b12dea482a94889aa047c" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "677846111c734549accb35e3b6ab89a8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7203acc76df0df0383b3831cdbc4eeb2" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "94ec587b9272ce1208b4b8c62b8ddfe9" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2c9cd16e480a88fa957834c7560a5164" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1e4efbe08fcd67ed8a0f5cb93295c7b8" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e0b896e0bc3c13011995804635785b75" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "dd4c09cdaf0ba328efc122906f2234eb" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e07c901b088b251abb8d1b7864b1ba84" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9b09434ce0fd48ab2d48e28d466ad4ff" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a82788f7dede56494e825d53786b8591" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "5d9af2cd538181fc88f9b7a925660e74" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "45f912e5bef405e540413c69e37b4860" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "95b426a93d5a128bc151618d800acd39" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "295d36ebd3e0f0514254142b2edd74dc" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32942080, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 4423680 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 13271040 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 13281280 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 18196480 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 31303680 + } + ], + "md5sum": "6bad2b725dec081a4ed9fc3af4ad0517" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "91b4db160161aa36f0ac926ab9ff0583" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "344506b58cb9d0c3f305bc79754772c3" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f583f6c242cb6a072db1e653cafaadbf" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 23121920, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 4915200 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 4925440 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 9349120 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 18196480 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 18206720 + } + ], + "md5sum": "44e075c28a7bd67fa790e15c48760557" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "47c573ba563754b10243637116ed76e3" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8b81df1a6ba6764f98c4469f85518503" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1bfb00d532f27b8ae8b5bf1c5afbf95f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "56d6175c052facc95c5ace5da0236cda" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b11bbd20ca40ecd0080fa4b483dc783a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "64aefef2013b075afcd3a3f98caebdf5" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5f7fad8cda5bce4f4116a30014ef1079" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "ac87a4e7f7e9071499344d7d1cd30123" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6f1a72e490622693114526c8bbcc63ff" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "610252c0ee83d5deb2cda4f638b6a82a" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "26daa45ef15b0bc16ef9712a8cd1b1d7" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "2d4f3217af4f5da4fe02cfd188a48111" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "66294d58b2a4a895b102a61409f4394f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "52807cb149f32829fb32a6a16c64eeb9" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4da20fb74adf550656f377d4aa22de7b" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "7cb5ba153e50646fc5aebd0ade18b66d" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "56f2d0018ebc480d21addf03176bf7da" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e0e5d0639669927fc4fb372672be755d" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "23cff83b14a992ca0a41878d61bb0a6d" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "28b08be164c7574335ae17db53ddb178" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "99739f8ce77f183b211af0f17bf711f8" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b08bf0b3d7338be77d2f2e3d34a33afd" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3cba302c1bed5dfe9aa24a64509ffd24" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "d581ad5b6fbb0a0b753969b0a96efe21" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "441b1e5323104859b921288c8af43734" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "97dd72c4a3355300c207296d7a6e4070" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b891c162de38903248a0bb0f99aea136" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "40e8cd9b992a8cf38858dddb3a3821c9" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f2a95bb7cc014af172ccfc88a2d4a4ed" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "81561e9a074055241e0ea0a1b692809e" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 28037120, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + } + ], + "md5sum": "663c03eb360615eb5c42f0568d4c0b9f" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "610ea13a352900464d7bd345ccccac20" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "20fb13d6d0a908f4df678e05656240e5" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6724c58f6a91bd2039c043b9d71d8ed5" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "ad62ceec7d62e517d8e1e7ece2fa7e0b" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "0841b3423eff8317ae13b384487ab399" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ebd1791fafd78d43a3833d74868588d5" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d9d2859bfb4b8b9734d736e4abbd753d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "540450b54811957128e65220b4d75e50" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e4a31bf78e452deb26f26308fe60feef" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7c4487daffbfd068f839938912ad8de4" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6c66be7c28a5f6e1e4a9910f3e44b3a4" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "e9d8e4b44f72c91db739508fb50cf991" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "45ce082c761d5e2b73ebbb4586019197" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "06f784a9795e571b728484832727766d" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "186d49ed81a218b620a64a581ee29acc" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "3452264349496cf55611785024023721" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d3721be9ce9df09048b80cdfa88d7206" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "70af0217dce8c6c322975d744541fbe3" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "bad78530bc8d10e780d4784277a65595" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "53ba40ac22eadf87bf216329599cebd6" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "10dfa4a9e4abf3fa8e09d9ce2402b6b2" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "05b85518c439b002ba21e577602ff5ae" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8b6eee5c93bbde1fb951e806c372610" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "f3ce4771badaeb7fedd4f0ab4b6698b1" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "05f1930cf990578afc56bdcf47ee1f17" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "09c5669de3a4635248f4b896c1e5e7bb" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0c32e99d7cb54cfc33ceca8c8fd529ff" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "407f808d246b4cc654236a5d0acc4aeb" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c15c8d47c8fb33055114b33cf49af88f" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9a98ee0def42c4f0b8db864cb59b501a" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "08ae3810fc499a9a95fc486562d8e7bd" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "92123f31f04bf70518c0c99f0736e7d6" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "47c2cad0f3a71f5d6581c2f4b0ea9f31" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6a182ddfaeaaff6de5c5c1f2dd605e0c" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "983af08936a7859755c718f0be46557a" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "d84a780a6d97dc917dedfa77ac535cba" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6e830f4e12ba391724f53adae87961ba" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d6e38a8aeb4ba6bf203327fd2b5d8a8b" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c74543ff4a141c23aac9990364949da5" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "e1bc3326971c38691a5d8b4618f94bfa" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "614703c470b53567e08e5d21944db22a" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0df658ab11e9400a0c0bd37d3384bd99" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6040b7ae6ac74fcee265d0c4000f3de7" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "aa509cbdf7a51c3ca1c2af52eeb6eeed" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "fa7af0d3d04bd3c6c506401107d0a237" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a7b6b820792a714758ac62a12741c16a" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e8100227872f46dc6fc4c3a8ae5bcd42" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "ee1ce53437e67517931aeaaaa01c8044" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "295948bf56c5cedd4ad482d2d88e2674" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23fccc7d2786271c0b26ef02a8092ece" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b45c4abb0bc2d226732d15d658531914" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "08755f97e55382250acda2f6ad9c6eec" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f56e3393e68ad98c829276cf6b82d39a" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "667225c113f2e78caed328a81c66626f" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1a66b3e43da31f0125cc984f26df56fa" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "df410936b2f35d2c65ea89d7f5f9d8d7" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "662b72be7995675fc58246637f1d2bbc" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c6f23d835dff1aad4d88a1c55584e159" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 24084480, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 19169280 + } + ], + "md5sum": "54b30a0e0a103d4b953c88ce02be5448" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 14745600, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + } + ], + "md5sum": "0d7d30c1ebd348cc31744afe9a056d5f" + } + ] +} \ No newline at end of file