{ "metadata": { "ParamSize": 98, "ParamBytes": 4943257600.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "2097305f352cca56c3dc0ad1edb5bcbf" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "800df08c617c74598d4ca6af76553de0" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "eb3b214c8e085725718f03cf804164ae" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6a9a2411562a216b805451d914163a8c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "32b4b83389df3b212ef853f8f19fc2d1" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 20987904, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12591104 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20979712 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20983808 } ], "md5sum": "35b8ff220ef5e2e6627dcfb621fb832d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9d8db30764216ea077a052dcd20fb2c4" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0856245486dd0d049581359e670f33e0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "af71427b909126ccc9a9635a8279e0a9" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "795af0ca2c531da387572abece860a7e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4c8d8ed56e53eb99608130db0c1de05d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "f7628a591cb04c54546aaaa4f9398a68" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64f4c86756397349e929cd1f2f849e5c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "90f829661b428a1095ad0216de8f525e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fb6f9e666f95e8149da896c79bbb730c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "097a18cf907839e45e7bceedcfeee09c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ab16992964f5135c087f55a6c96eaf30" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "75e6fc44ba742deec3aea8e1bd310f22" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6929ff78c04aaf58088e2b0fd33c61c4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5814d5a86c183a63f441e473b96eac9f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "66938847fe03b3b7beb0c34e0ea3d146" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "60e2dec95a7f652b11879a58814a0b72" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "daca4f0d1145ceba60aba195b6e4b717" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6f6f0eafe2557a8793292023a93fc44d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ac207948c6dd9f8c618249b5c8370101" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9d34ce6dc3d3bcecc868378358d66bcb" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "24b869dad75eb377412439976e3ae012" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0168cb04de60c94a82da930ef69dfc79" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f38fe5a46d483f933a79ce6577bd5236" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "49401c003986778a26ab6e1d17dea5a9" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c9edef3101ccb32cf23b9342315b1a38" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c7b95c82540aad932bca9ebac8caa96e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "e44904f0c432ad45b9919a22b590ae9a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e337579a791f65389f4a5a93dc95633" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "79c0f7d1fa6519b778d30ef952522121" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "5a57388997fefe29f3788be628569150" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e2d663fbf063a76d8845a1117834610f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "50e4119a748b23e67bfa10d1ec8e1f00" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "c7f736b657c13231c2c106aab8827f06" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20975616, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 } ], "md5sum": "2484e69deb5afeb91287322eac29c4f8" } ] }