diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..b651729acc9e3584fdcdee73c9606390d079d5c5 --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,1446 @@ +{ + "metadata": { + "ParamSize": 98, + "ParamBytes": 4943257600.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 525336576, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 128256, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 525336576, + "byteOffset": 0 + } + ], + "md5sum": "2097305f352cca56c3dc0ad1edb5bcbf" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "800df08c617c74598d4ca6af76553de0" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "eb3b214c8e085725718f03cf804164ae" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6a9a2411562a216b805451d914163a8c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "32b4b83389df3b212ef853f8f19fc2d1" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 20987904, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 8192 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12591104 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20979712 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20983808 + } + ], + "md5sum": "35b8ff220ef5e2e6627dcfb621fb832d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9d8db30764216ea077a052dcd20fb2c4" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0856245486dd0d049581359e670f33e0" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "af71427b909126ccc9a9635a8279e0a9" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "795af0ca2c531da387572abece860a7e" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "4c8d8ed56e53eb99608130db0c1de05d" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "f7628a591cb04c54546aaaa4f9398a68" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64f4c86756397349e929cd1f2f849e5c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "90f829661b428a1095ad0216de8f525e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb6f9e666f95e8149da896c79bbb730c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "097a18cf907839e45e7bceedcfeee09c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ab16992964f5135c087f55a6c96eaf30" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "75e6fc44ba742deec3aea8e1bd310f22" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "6929ff78c04aaf58088e2b0fd33c61c4" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5814d5a86c183a63f441e473b96eac9f" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "66938847fe03b3b7beb0c34e0ea3d146" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "60e2dec95a7f652b11879a58814a0b72" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "daca4f0d1145ceba60aba195b6e4b717" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6f6f0eafe2557a8793292023a93fc44d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ac207948c6dd9f8c618249b5c8370101" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9d34ce6dc3d3bcecc868378358d66bcb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "24b869dad75eb377412439976e3ae012" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0168cb04de60c94a82da930ef69dfc79" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "f38fe5a46d483f933a79ce6577bd5236" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "49401c003986778a26ab6e1d17dea5a9" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c9edef3101ccb32cf23b9342315b1a38" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c7b95c82540aad932bca9ebac8caa96e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "e44904f0c432ad45b9919a22b590ae9a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3e337579a791f65389f4a5a93dc95633" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "79c0f7d1fa6519b778d30ef952522121" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "5a57388997fefe29f3788be628569150" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e2d663fbf063a76d8845a1117834610f" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "50e4119a748b23e67bfa10d1ec8e1f00" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "c7f736b657c13231c2c106aab8827f06" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 20975616, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4096, + "byteOffset": 20971520 + } + ], + "md5sum": "2484e69deb5afeb91287322eac29c4f8" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..f6a2fee79c403ac95e9b949516b61b2f06665aee --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,1446 @@ +{ + "metadata": { + "ParamSize": 98, + "ParamBytes": 4943257600.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 525336576, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 128256, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 525336576, + "byteOffset": 0 + } + ], + "md5sum": "2097305f352cca56c3dc0ad1edb5bcbf" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "800df08c617c74598d4ca6af76553de0" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "eb3b214c8e085725718f03cf804164ae" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6a9a2411562a216b805451d914163a8c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "32b4b83389df3b212ef853f8f19fc2d1" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 20987904, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 8192 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12591104 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20979712 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20983808 + } + ], + "md5sum": "35b8ff220ef5e2e6627dcfb621fb832d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9d8db30764216ea077a052dcd20fb2c4" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0856245486dd0d049581359e670f33e0" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "af71427b909126ccc9a9635a8279e0a9" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "795af0ca2c531da387572abece860a7e" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "4c8d8ed56e53eb99608130db0c1de05d" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "f7628a591cb04c54546aaaa4f9398a68" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64f4c86756397349e929cd1f2f849e5c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "90f829661b428a1095ad0216de8f525e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb6f9e666f95e8149da896c79bbb730c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "097a18cf907839e45e7bceedcfeee09c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ab16992964f5135c087f55a6c96eaf30" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "75e6fc44ba742deec3aea8e1bd310f22" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "6929ff78c04aaf58088e2b0fd33c61c4" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5814d5a86c183a63f441e473b96eac9f" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "66938847fe03b3b7beb0c34e0ea3d146" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "60e2dec95a7f652b11879a58814a0b72" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "daca4f0d1145ceba60aba195b6e4b717" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6f6f0eafe2557a8793292023a93fc44d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ac207948c6dd9f8c618249b5c8370101" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9d34ce6dc3d3bcecc868378358d66bcb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "24b869dad75eb377412439976e3ae012" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0168cb04de60c94a82da930ef69dfc79" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "f38fe5a46d483f933a79ce6577bd5236" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "49401c003986778a26ab6e1d17dea5a9" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c9edef3101ccb32cf23b9342315b1a38" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c7b95c82540aad932bca9ebac8caa96e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "e44904f0c432ad45b9919a22b590ae9a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3e337579a791f65389f4a5a93dc95633" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "79c0f7d1fa6519b778d30ef952522121" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "5a57388997fefe29f3788be628569150" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e2d663fbf063a76d8845a1117834610f" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "50e4119a748b23e67bfa10d1ec8e1f00" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 20979712, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20975616 + } + ], + "md5sum": "c7f736b657c13231c2c106aab8827f06" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 20975616, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20971520 + } + ], + "md5sum": "2484e69deb5afeb91287322eac29c4f8" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbdc422d988f056302312aaf25d5eeda74a36d46 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b111063841df6851fa28395ca43f6570c81486a16d4830d0026cb62cf14a7b5d +size 525336576 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c24cc269e157ac9d9098f4fe2bc47772f85cadc --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2751b067ffdfc7c9b3501307f9b0dedf655c2a2ced5c13e67a27b02a497dbb30 +size 33554432 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..1709ee1525e535e5ef0511ff9c750c01c1e8e29c --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fec79d8e298aac768aaeaa71871877fe60291bcc14c236c56c345bbab5a00b2 +size 67108864 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..909cad613cb62100656167e06f6663de74141e51 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f2f6e8a15dd3ef655d7f76d3ef58cd208b5f34058ffe88f43242ddc8cbe955 +size 20979712 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d262a746df3f3878ae21c1c3d88070d52fac5c6 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9eca1a84eb57c68fb3c4b33a1c5f6688a8605c31bd42dd1af5c04063cbfb378 +size 33554432 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f3a1b53f0670922f9a5b6a2d28c94f38c558392 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0d12905ad47315b08849029622d665c60e97396f35b5307c25079c6afefe5b +size 67108864 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e1cf7a7c7768633ed0b658bd76e0ef8f2f3c7bf --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3e0f88920cc5b66aa7b0fa5ba87d51e1bbdb21de6c6227f93e0a5a87d9070f +size 20979712 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0834e787f4c3a34acf8ef9291ef282f13cefd4c --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eda51a024f20c599fbf8dd0041a16419676661415019a4a050722b56dc82a9e +size 33554432 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..2311756635dc9e4a92782af85a7a6f6a0ff77c47 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd082bad03d500d58887307c0f8eba7036173bcfc6cd0b0acd2ad4023231e04 +size 67108864 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..77c1c2dab6fc3c6d20320df0f0b60cc5640156a9 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d66d0cc6d0128c6e83b7bff692922161ddb1350d0f467c09ffd024890d26421 +size 20979712 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b72f9bf17b1e41396d407e5a92cf1c21a1dfc40 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc01f83069720ca78330773cc349bd8bd9ce27511a594f0a874ad81dd1892b5c +size 33554432 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb440b9dbf3f1ad09ebeb786e92adb7bc2330620 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619592501a586e71828897bba1dd0447f675ffece9aecf135fb317d40bbf5728 +size 67108864 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..77f3cf55828cd3774a4b558ef22993d6f2896e05 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f4c8e627a778e1ffa7369a3a32981e90f2a104319753aefac4c7565c6fa731 +size 67108864 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b291f76dd8350b53b270e1837659081049213e6 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d958501265d40f9ef4cd9ae95c472d2fe74fe0695e7173f8d7369d58cf04683e +size 20979712 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..04ced409e93426701d2ede3fca4bb18b6bf4c039 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284ca029330d3ce6413f8ecd0c38dd6bd0dd0077d2facf182bbfe2074827e72a +size 33554432 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1dd393618ddaa9711caa2c1dc23c3c6753c5aa2 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d69ffac141f44459044e61873d16f8d9fff64f978e4ef06cc7a62dee0e891c +size 67108864 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3a64320fcfa464deba20176a55b62d7b7c339c3 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ffd629d0ed973ae01b4f97d87cecb48ccfebf7ae94f8ec171b51d22046fb7a +size 20979712 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1ffbf5ed1c255c7b22f677d49d0c21979b2d3c --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c322e8f8a3641339375d21dfa0e52b6bec3976f5a8ede49f200dec8bc38149 +size 33554432 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0d32f01bd58ebda7e49eabe32050a7e934a04e --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b68611f11765f27e65c60bc5a4391b9e7bffbd62da6a0039c01f8d7e39ee210 +size 67108864 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..798b8cb1cc769df84e15171f8cb793b5de5f97b6 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23673cab99976fa38cee9b9bc53c9f1d40b5e9686ad21d9b32a7827f3dfccb59 +size 20979712 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee5f1c5932e3144b10f1f57d07ce84e98c55c072 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87981c1be8b95a602ed901fa3fc525b4ba83e2e1ef4769b9f382073b408e17f7 +size 33554432 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..00776715b7f5c30d1fc7ecd5e77ef84f0e179427 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712c6f7d3358449a33d0c42f5563a7df91315bb1c8208392722eb8d329e9bd81 +size 67108864 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..9968a17f569fd859119a17abd5086148e788ffe3 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f85f4f5c404a1548aac2a8b6589c26e79e6a70550411b0ad6ee14822122ecc5 +size 20979712 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..29891c883fa82bffc9415e70d5e13990a471dcce --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a813f0b5f69255660f70c184476fe1e07380eae849ebb82ef774d923b2ec1bc +size 33554432 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..01fe003aa518b59f72b0773b9e28ee995e064061 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40fe993e7ed4a0b51d71b02bf8b16761ce56b36f232f14e3c31c394f1c0804e +size 33554432 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..5385d6aae3272f951b26001ee969dbf0924a1bbb --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b762587add02e7b63f8ffab05a46f30d3cb3fa04710db88f0894a78ce192abb8 +size 67108864 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..b15de79aaa99fc1fdc7b2d7add6dff14762bdf33 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f869262e90c2e4ed78d52bb227b57bb6731a3c85b1b52c2f2dbb42aaf6be21c +size 20979712 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..c255ef45723f414b697343932738df531e161704 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac72c3b0d2484e25de526c0869435f2581722547ca2ff19526a9f28ec06a2c4 +size 33554432 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4d8d609d564d064dc8e9424c8e0affb941477c4 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf1039cd6f774e0d4a9ff4111a2abce43c46bf07b1f7c472a44d4f780c23663 +size 67108864 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b11e35347a58531171f233c1e0197d806552700 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b98d5d31457778c761b58f658a49afb0076be3f7ecf55aeceec3fae7110da0c +size 20979712 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2e8a787df9a8d2eb5679830d0e20d84935ed533 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d3162b619ec7e50359470bca94bb1a9900866179068ef7c0b6d577afd24c69 +size 33554432 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb2db927357794cd9a94029d8b1cb33017ddc8de --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b154094b220e60f814e0892f52004fd27d05fe46af23e4483fe70d7fff31ae1 +size 67108864 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..fff8add51aa8cf5ff19164f01eec9ded88b2a419 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0b88b362fac5b32e9609c0ab26197277b118a43a61bac5210045317a3e9a5e +size 20979712 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..4977565f39bbce65e9b84fb4c53263a168f70404 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c35432c958b20627cb04fa50b56429affc65085baebc8ba3603af31bb928f2 +size 33554432 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a6da8b892fe5198e129e7ed209daf1c66a1e466 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb6ade0e13361d86784563d721c7141e58f9b17cbd5df261de1a0e861a16b7a +size 67108864 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..88d8b4e7f79b1307c7f09f3c417335d8f5902aab --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185db985b99858d6f3beeace72f38e639528e0b943a8a1cd81b101d3b3cae471 +size 67108864 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fb4898642e579585eee4967911502e0e4a3dc66 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b74abe9959a32f904b1156963e46ae08e33f317055c2653d5a639bc4d2959e79 +size 20979712 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..430821c2c8a3895fbfb7ca018975b90a4b2edb02 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f665309b6599288f471acd10c5c6d109799dfa1087693b549fdf76fdf735843c +size 33554432 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..a302d2439a2622a2954814010201edb7b28ff20f --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c4cc3b469adfe6521251ec0afdfbed7fc1981b9b80a6bbb8f6eed7f04795da +size 67108864 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..d82b6f22abbea3665ca60270a91f0d9eab63b641 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36b1e0c801344cfcaefa2f98ce5e9187b17d4ec42b042d924eed4ceaaeea72b +size 20979712 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..d02f9551337423374fd51877657c2c3bcdccdce2 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1607dace1e9c113055b5d232141dd8938d84fc11c9e82ac50ab68830504f5da7 +size 33554432 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3fa3c577c1886ed0d564a648a229fd754703d8c --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678cc0c4b0deccc816101952e4129511543cefb40a47af72a34cf7d34279c16d +size 67108864 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..373b650da6555aa815652372bd098e02f6da5ac1 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedef3e2ff9d4c9778dda120ab80c82ab7fb09a360ad33b648d1cf98563aecf1 +size 20979712 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f7bab7798891c9d545668e28b440b06b3d12aed --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2cf0d445902f6dfcff13a47076c2f908adc1b2b99b276cd1eed41ec6529c76 +size 20975616 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc8cefb98a933ca800a374f29a12df24ac95576d --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c5a7920cfcc502e7e7c8faf63c7a74826a9a6636df11a703c86f700e7ac0c9 +size 20987904 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..05664d9ffd0f8d99817837aaac4ddf5565922837 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d89336af2cf06d717decc432cdf30706c798f77af5699a5de78ff08910091b +size 33554432 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..04afcbd7d8ec35e08f15712b63b2f9ca5eed0067 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155426c2069b8426af85978fb0ae63d6e6e09969144fe2758966d984abd42a51 +size 67108864 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..08185880d6b50d4a5f57f91953c0cab3d1057109 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97453c83cc7bc80bb0e705aa8134d91cad229e2528e3675161b9cf02cd2bf243 +size 20979712 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4c7df7a69497a7bbfbb7a111162e2a4901b4bf7 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb54369e3d85e902166d906f03bc47270ed387ae92c8fbeb714f7dd49f27af8f +size 33554432