|
{ |
|
"metadata": { |
|
"ParamSize": 323, |
|
"ParamBytes": 9081201152.0, |
|
"BitsPerParam": 9.046979540559027 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1050673152, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
128256, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1050673152, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c79f9f063890532332f8da2589ed0cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1050673152, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
128256, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1050673152, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "443a8c35a9ebcd27107a962a6b5d2e60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb18c864a5537cf1c820595a3c8da431" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "859269ff14f85726d23bf3fad4778dc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfa0102c5071e96fc7b3fec573a12fc6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d980765228cff4bca8d09d9e1bcebea5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "49190d9e24712de781967667600e3cdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acc3232a20ec7475890b94f02bd6eda8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd7979ad65b31f1489b5b4c1749389ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd1f1a497ef1a285b39a91f580f12a69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37573df1ed1baa7fabe8238cf193281c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a25453505d8d92f0d91d92a7ee58a6b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "555c55e65f56d6a98c687a7414073293" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52aed4740d128eb93fde928489476aff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efb95ab09e911aae0d10d411910ba789" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "820b8e068f99a25c0bf6961000d614b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52690d569efe271f6e10af8b967b7d16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a014dce929e00158b40a35ed78eaa8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50fe929ef85eb45869fcbdaa485a26f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51edb418e653f51e37388c92254f1c55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94bc4dc51bde779b0b5837bda6ef1e48" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a5d569a77a1e1a8631ce269965419f4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "004b0e6aa9e2e6164d73ad4a86ebb7b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "394b11a4bc9cc4f5c82f100516702300" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d37ba16417c015e75f9c0ecb41f99916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a400e47ce9f48128f395716498b56488" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07c60265438aa529bb9d49f9003dc8aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b05a33ba8679e50d121a19296ef73430" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe9fe6e39b3daf7e766a97b0accac44a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f937e0faca4d2967d4dcf30e5ace1d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c5676e78a7af64cac3cffcb1b46be80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0c30e46622b7a9b2e4de5291130ebdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e431697fad371f03cf02ad332251ba2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1aad061590af2b2ea4d96f84681a1ef1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f4f03fe97da5b1c2911947db1141bcd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e6d7420d843ab6310dc6fc22dd1e5efd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a562f55f4556fb4bdd28bc9aa03d9f63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2aa280429d1818f82eeeefed114ec43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1a28ac62dda3bf7d3f3cd0e27d94a5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31e163ad7a68f16e96df71ad962877d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65958024e515f228248504cc4df04a19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16d9cf8211d69418089a2bfef3076680" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b6e03ee6aff26fa893805f1cd992afb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "529650f6598d120ccc5b73fa83934488" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17a455efe78cc00cdfec8c902227d577" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e20de2b22c9b1d0d49bce144b2d465c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "717099fe6f5453f366c7dd88bb852ea4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccd6bba8648a350d9e02570cf4962d1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc750adb8ae849ac35ecc34d7dcd5816" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61cb9e3222eb2a69d3fdc30874efdd27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b67a7bb780f7237681a5c744d9d96c78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "683707e940ecda57bb68dc6842765d13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "346c3396cea046ae89931bf78a3e1253" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "379c00addbb94e2a32c180280580e84c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c5e6d5762b6b00988d11091d75e0885" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf5a15162ca2ebfd500f07450e29118b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eee0e77954364fb92068d470482d9558" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e28e8c0fbd38a722ec1ea8d52f6d670" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8cb44eeb6e8d9673043649f7bb6de92" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "afee385d7a56db4ba16e2d4338ee4c42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92140513fc226e5b04ef8b1729afb6e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73c41a3ba5b79c9c3d4bffaffc8f29a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16f7cdf21b686c7e2e7bdd7865b63f06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "796f8694234c10cb7f70efece86d12f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "131e383fef78dff08e81670c807cade5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5060b3a75c831a2400dd0964b0581b9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee0b69aed4827b160aa43e7c816d7b89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8dd0f8ca7473f5ec14b8becfe8b84f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a383af93328fdfb651afc7ca478185da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26c5a94d72153d59e2882bdf743de499" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2612845126d3c9aedc64a130c41f4ef2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72c1020f23578326238a03735b7966ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be43f2dcf01fe92c16c6fdde4578a332" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0165db642c3924a116f6a1401a5ac637" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f281f74f05ffea2375765db33d8696b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f6f92f71f61ccc478e623d9a8e9b3ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e43d80a678bf4e56896ead72f276bf32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "392c740d765ea4c96b9493feb37fba74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d64c984973fdfd7d06e351c3cb461635" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b90407ec2dd6100382bed7f11b003a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f548ee06ba954fc568ce8b75ec87ed19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b5af8e2a16535b06a26ea71acaea36d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce4443881a0e04570a2cdbd2b6faa412" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08336bcc0492b1b98db7db03ee54e849" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df80a476928605e6acb9440e4dba49be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "324ffd6ba8ef1197b271ae257d94dbad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20fa4e12dc4ba27bc856308cd57e66d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ce4cb53f657711729dcc54ab572ca6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d9010e8f2eaff4e98a2c5043377c6ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fc4111ead3ce25a870e14369893dd41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b7d2288421481d15a3855013539ea02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ca4f844016c03231ee4d3679f6ce853a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cffe2e7008403dc9b39358572a05e049" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fee8e0f2649a6a6cb1ecf90b5ea0274f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24a3d52bf0acf242430e94565ad89790" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "305eb9b0055af82cecb72a2e029368cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9b4061fbf46d32fa71e56b5934da53b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d00551ddb2e5c5cf81e5b1a8c45f260" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0deed8dcc8050abf4a6fe21b3a3f215e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a354b3317c7b283c62f0a25ccd24366b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fd90ff929543b2a40f379a7b6a195c79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b98eafecb641d082b37e528be46634a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14889a87af60d074f8364af28e2e7d8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b8e5af5a33f2ea3584418f33a50c3ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "baa6fbd062d89c51d089910495901dfb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "328decc74d93641fa9c16bea7e7b095a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a769694c6b15113a19d0983f1e05d88e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fd837876a11c1392d04ba56ce7538d9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "734d71cf51c0458231752747b7a1f17d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65d2eed328271f325175167fe878cdc7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "835f10c5a785548747edf40c329644d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e701dd471c50a60cbeaf9751ca3c184" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b79490047ce9371df8422d645af64968" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91d8f96054db7a29034da491f2ad6eb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "703daf772565209c77c3d6090d146782" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19628496ce6e0431aa505475391b4f33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee77166a7802d504f6d7cac33c35503a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f47f1c1566a9248e344fca90b7e2382" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37f4ca216ef82f37946cc70aea85ac71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b4abc11c84f4485441812223b4155c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "464c942cfa622474a66436bd48b56e0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6c08465157a49cbdd9845bd5db1bce6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b3d61740600e64883b6bc37e17072bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f4149db52ba940dcfa9f66a8dff8ddb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e45a5a68dca017a35bb4b54e61fd367a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87d68d6aca79f1e389967ae28d4865f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0a167a49b8d2e0ab3febca1d8a818ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25698560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 8192 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8194 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 16386 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 24578 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 32770 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 32772 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32774 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "e4m3_float8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 40966 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25206790 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25206792 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25206794 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25214986 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25214988 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25214990 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25223182 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25223184 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25223186 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25231378 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25231380 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25231382 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25239574 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25239576 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25239578 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25247770 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25247772 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25247774 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25255966 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25255968 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25255970 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25264162 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25264164 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25264166 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25272358 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25272360 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25272362 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25280554 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25280556 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25280558 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25288750 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25288752 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25288754 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25296946 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25296948 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25296950 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25305142 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25305144 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25305146 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25313338 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25313340 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25313342 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25321534 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25321536 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25321538 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25329730 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25329732 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25329734 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25337926 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25337928 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25337930 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25346122 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25346124 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25346126 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25354318 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25354320 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25354322 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25362514 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25362516 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25362518 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25370710 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25370712 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25370714 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25378906 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25378908 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25378910 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25387102 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25387104 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25387106 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25395298 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25395300 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25395302 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25403494 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25403496 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25403498 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25411690 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25411692 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25411694 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25419886 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25419888 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25419890 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25428082 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25428084 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25428086 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25436278 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25436280 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25436282 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25444474 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25444476 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25444478 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25452670 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25452672 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25452674 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25460866 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25460868 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25460870 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25469062 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25469064 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25469066 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25477258 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25477260 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25477262 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25485454 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25485456 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25485458 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25493650 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25493652 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25493654 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25501846 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25501848 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25501850 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25501852 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25501854 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25501856 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25510048 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25510050 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25510052 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25518244 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25518246 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25518248 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25526440 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25526442 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25534634 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25542826 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25542828 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25542830 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25551022 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25551024 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25551026 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25559218 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25559220 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25559222 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25567414 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25567416 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25567418 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25575610 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25575612 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25575614 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25583806 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25583808 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25583810 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25592002 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25592004 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25592006 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25600198 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25600200 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25600202 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25608394 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25608396 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25608398 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25616590 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25616592 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25616594 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25624786 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25624788 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25624790 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25632982 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25632984 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25632986 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25641178 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25641180 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25641182 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25649374 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25649376 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25649378 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25657570 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25657572 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25657574 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25665766 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25665768 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25665770 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25673962 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25673964 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25673966 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25682158 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25682160 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25682162 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25690354 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25690356 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25690358 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25698550 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25698552 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25698554 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25698556 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 25698558 |
|
} |
|
], |
|
"md5sum": "a565ee54f0509be91e2ae031ef6743be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 2 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 4 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 6 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 8 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 10 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 12 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 14 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 16 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 18 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 20 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 22 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 24 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 26 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 28 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 30 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 32 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 34 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 36 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 38 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 40 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 42 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 44 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 46 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 48 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 50 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 52 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 54 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 56 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 58 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 60 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 62 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 64 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 66 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 68 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 70 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 72 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 74 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 76 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 78 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 80 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 82 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 84 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 86 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 88 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 90 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 92 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 94 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 96 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 98 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 100 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 102 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 104 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 106 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 108 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 110 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 112 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 114 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 116 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 118 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 120 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 122 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 124 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 126 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 128 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 130 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 132 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 134 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 136 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 138 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 140 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 142 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 144 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 146 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 148 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 150 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 152 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 154 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 156 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 158 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 160 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 162 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 164 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 166 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 168 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 170 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 172 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 174 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 176 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 178 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 180 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 182 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 184 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 186 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 188 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 190 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 192 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 194 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 196 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 198 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 200 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 202 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 204 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 206 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 208 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 210 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 212 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 214 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 216 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 218 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 220 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 222 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 224 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 226 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 228 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 230 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 232 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 234 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 236 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 238 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 240 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 242 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 244 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 246 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 248 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 250 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 252 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_calibration_scale", |
|
"shape": [ |
|
1 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2, |
|
"byteOffset": 254 |
|
} |
|
], |
|
"md5sum": "3dd53907af219ff4adc62aa62c8700cf" |
|
} |
|
] |
|
} |