diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a89021da9e81444a1d52517e07d6e016aac60ecc --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "_name_or_path": "./hydra", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 7168, + "initializer_range": 0.02, + "intermediate_size": 20480, + "max_position_embeddings": 200000, + "model_type": "mixtral", + "num_attention_heads": 56, + "num_experts_per_tok": 2, + "num_hidden_layers": 60, + "num_key_value_heads": 8, + "num_local_experts": 4, + "output_router_logits": false, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "vocab_size": 64000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99ab23e2dac50cc0783f4d534072bc34e29dd7a2 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00097.safetensors b/model-00001-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d41e2c09c6c96e09ade14c9311bdddf520cc5393 --- /dev/null +++ b/model-00001-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d117e5879d396c48913485af01eba3d2985f4d2b0a735e5c33f2f9d023c97e48 +size 4653696216 diff --git a/model-00002-of-00097.safetensors b/model-00002-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..849a26e786af7cc82b9b1f7d47164aa9660d33b9 --- /dev/null +++ b/model-00002-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c187c0037c4473881c2d59e4fbec5f33f1b22d9aeb1dc0fb0df8442fd1c871da +size 4961920624 diff --git a/model-00003-of-00097.safetensors b/model-00003-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18d378d091c49245db4d62f867f7af7978237c24 --- /dev/null +++ b/model-00003-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df80ae0218de635d3b4afebc2abae278f50e603c627b34aa50e65730f3b67d30 +size 4903257368 diff --git a/model-00004-of-00097.safetensors b/model-00004-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cae4657fa894175538f8e3c5961a79c0b46b773 --- /dev/null +++ b/model-00004-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7845e06b7c3bde58db05f4bdec03b1440111fd5ad952f37d6ee1f51fa386a13 +size 4580353752 diff --git a/model-00005-of-00097.safetensors b/model-00005-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e42f2e97d6d672098285c98e113531722edc9eeb --- /dev/null +++ b/model-00005-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9dde426697cf498efe2dbce4871e4a30ec834c586c95da0453ec1484f4701c +size 4697621552 diff --git a/model-00006-of-00097.safetensors b/model-00006-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b0f74463ed1f1216d8fbd23331c5cc677f2701a --- /dev/null +++ b/model-00006-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f1dee4d19153effcbd911292ede13d395b2b19f607574d77d65e95991d0fb8 +size 4580353752 diff --git a/model-00007-of-00097.safetensors b/model-00007-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eb86f7f2592e0d79e0412577a1f4fecfe43edaf --- /dev/null +++ b/model-00007-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353c3d533235be79cdb2573b6a6f955fc37ad5bb9d21523c830e59e84ddbd673 +size 4580353752 diff --git a/model-00008-of-00097.safetensors b/model-00008-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..921a747a81318b84d267734338dd65c5116d68c6 --- /dev/null +++ b/model-00008-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11b713e442ca8d2532071ce23b999f6784699af013a580475019b82cefdff33 +size 4697621552 diff --git a/model-00009-of-00097.safetensors b/model-00009-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14c8b2d365643d1daacdb75a03e35cde2b694f62 --- /dev/null +++ b/model-00009-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e29c64853bdb5556dbe4108e096ab93c416b9ecd5d694f357e274ddbab7b7aff +size 4580353752 diff --git a/model-00010-of-00097.safetensors b/model-00010-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a04f50c9bc08ed9c4c25d357490397316aa1d768 --- /dev/null +++ b/model-00010-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95dab034af983da2cc85ea8ba019b5f6bb15c10d3f2ea5a3496e44260425c2e +size 4961920624 diff --git a/model-00011-of-00097.safetensors b/model-00011-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63486225d87416b04cb9d6725fd90cb0569df617 --- /dev/null +++ b/model-00011-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16f26f7cbe76195c1e9f9df87b5ef41b1d0febe73d6135025aa17112c5675e4 +size 4903257368 diff --git a/model-00012-of-00097.safetensors b/model-00012-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..448adfcb1ce6281207eebd9f6cd1135f98f6285c --- /dev/null +++ b/model-00012-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d58be7dc9bd5d420e0e59cda74e277e9d78f7f038cf33787a5dd0b09fba32e1 +size 4580353752 diff --git a/model-00013-of-00097.safetensors b/model-00013-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c36f9715436bc03c185f1b517765976ee7d8dc3 --- /dev/null +++ b/model-00013-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29968ffa2618c8bbfc742826192d2d0975afdb19dd2b00f55c612ebfcfefa8ca +size 4697621552 diff --git a/model-00014-of-00097.safetensors b/model-00014-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ad7c2b126fd8f8d543bc6c5ca799bfeb0be44a4 --- /dev/null +++ b/model-00014-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e0f44eed713e413a83ea3b24f28cbf864f88a41f751ed4d4bce113140539e4 +size 4580353752 diff --git a/model-00015-of-00097.safetensors b/model-00015-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..602266bbd992448015e3f30e4da53ea341389208 --- /dev/null +++ b/model-00015-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50c1ef7e6e0760866e41132062b348be108e851e4620a00c5df356c7994bc4ad +size 4580353752 diff --git a/model-00016-of-00097.safetensors b/model-00016-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7131b2d36053091ca70566eae7cf218fc07f940a --- /dev/null +++ b/model-00016-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4d5e48f2ec24d260c19c0c8ab1f0ee6eb125903509a4e4cee7863f0cd51e59 +size 4697621552 diff --git a/model-00017-of-00097.safetensors b/model-00017-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe28bbfd43b97bf48b2a71d679b0f8bc4e0a60c5 --- /dev/null +++ b/model-00017-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f4606e8052d85cc25eaa8ae5892faf457a124fe23e0af03f8140453ef5390e +size 4580353760 diff --git a/model-00018-of-00097.safetensors b/model-00018-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..840e7f287f377df10862fdf543e4b8683381c399 --- /dev/null +++ b/model-00018-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0370859f69e87e71b3a32df6904677817004e1c135b7ae4185303145dc7b723 +size 4961920640 diff --git a/model-00019-of-00097.safetensors b/model-00019-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d021f44d7e9acd1b71b6dacaa41ac22d487ebc6e --- /dev/null +++ b/model-00019-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d29eec6483b2296297643ec82a4ab4e4958234c601d25827b0dc655920db918 +size 4903257384 diff --git a/model-00020-of-00097.safetensors b/model-00020-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a33a130becbcb1785a38c5615398e2f61e45be82 --- /dev/null +++ b/model-00020-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982ea8472eca941978ede4b3beaa37ef656cff10b688937f82f464ef83a32e8c +size 4580353768 diff --git a/model-00021-of-00097.safetensors b/model-00021-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a511c66891d0e6a058811798a61b0182fe812c9 --- /dev/null +++ b/model-00021-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a3dc599f71e93c0932f3fa07f633aa53984a8e00ccc93bc39781fb235fa73e +size 4697621560 diff --git a/model-00022-of-00097.safetensors b/model-00022-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f651c09a02d605f48371820517d45a53b1f5cddf --- /dev/null +++ b/model-00022-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44611a57738045b0002cc21c605f2dbb551ba0113b2e25d38c61f21310746249 +size 4580353760 diff --git a/model-00023-of-00097.safetensors b/model-00023-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..273c2a1334f2e41481cb15b425519f484af602d3 --- /dev/null +++ b/model-00023-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4708648589672bedb8ad090e58660465a53f253768449974773aa6e0cc0dec91 +size 4580353768 diff --git a/model-00024-of-00097.safetensors b/model-00024-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22cb596343f879096b8bab002f8b4437c8c6eeb7 --- /dev/null +++ b/model-00024-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6010cc86f4ccd7fa976ff5e22fb82cd683121fa82790b98a41d8733e080c8bcc +size 4697621560 diff --git a/model-00025-of-00097.safetensors b/model-00025-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68c629f0ac2e94121575100e25074614e0b7cf5c --- /dev/null +++ b/model-00025-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d7950fb10f239905eac778c7ee8b0eb04d8607a7937e0a7d4da4365242e8167 +size 4580353768 diff --git a/model-00026-of-00097.safetensors b/model-00026-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ba8a5696269db35307b25f4aab33e84f269357 --- /dev/null +++ b/model-00026-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bb0db0e48759eb12a41073ce6fb6f208f8970f8708000702532b0b7627b598 +size 4961920640 diff --git a/model-00027-of-00097.safetensors b/model-00027-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d541347d930fc697def74468bcefb36d8ca8e133 --- /dev/null +++ b/model-00027-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df789d4d89bf412f8915c6c566a1dfac3611ba4b3891c420f679e4b165db400a +size 4903257384 diff --git a/model-00028-of-00097.safetensors b/model-00028-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..becbbef4b2d7b38a8a96ba71f14d604460d0c264 --- /dev/null +++ b/model-00028-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3ccf04055d41a5be0e6a023d1cfc7200113c195accbd84e63eba53417312db +size 4580353768 diff --git a/model-00029-of-00097.safetensors b/model-00029-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..404a43d4650be551919d73546cf2e67afd89153a --- /dev/null +++ b/model-00029-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1f12f9c6f1a700cce7688d00fc495cf32f314ac03c63f668dcdb232b9da7bb +size 4697621560 diff --git a/model-00030-of-00097.safetensors b/model-00030-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89881dbddccdfd29e5b00b9468040fee7f7e67ea --- /dev/null +++ b/model-00030-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4e0efe8ddeb1f48241e2b0ed04d2569f68106bfb6adadc5ca2b5d0160a68e9 +size 4580353760 diff --git a/model-00031-of-00097.safetensors b/model-00031-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6e20a2fe3e06bb041ef4fc83be0f5030d868d97 --- /dev/null +++ b/model-00031-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914c8cf16cd88cc697e6593d6830e7a4875b9f96d1ceeef37b82dbc8564fee97 +size 4580353768 diff --git a/model-00032-of-00097.safetensors b/model-00032-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfccefe7e286de2c956ce21918f48896476f8495 --- /dev/null +++ b/model-00032-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e2e9a15e94ce95d0358c314bc9de11bef393730d661c56dd7a1ca5eae4e682 +size 4697621560 diff --git a/model-00033-of-00097.safetensors b/model-00033-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d73ac123c73359741551ad3a20a0635e89b38604 --- /dev/null +++ b/model-00033-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08df07f2b16e3b996a6f9eccd2783fd613b7139a1a8f0651d6af64480827329e +size 4580353768 diff --git a/model-00034-of-00097.safetensors b/model-00034-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a87a0fa2997862bc9457667a9c42ec028439c690 --- /dev/null +++ b/model-00034-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b3439d5a8b3f95dab12dec409616c5f48736c05345b81df5e8d9ea7b22fe1e +size 4961920640 diff --git a/model-00035-of-00097.safetensors b/model-00035-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28bfffe777bef2c27892fa2cf7f8320cc125af77 --- /dev/null +++ b/model-00035-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fee5f1e157232ed56d5f2320494fcdc434e076b703e5fd8eb4c5794ac1e3c8 +size 4903257384 diff --git a/model-00036-of-00097.safetensors b/model-00036-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd00a812829e3897617fe2c6a5bf52ce934f2cf6 --- /dev/null +++ b/model-00036-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd0ae375bd54a079a397a0319abc0936b24ef11d3463f8f31e93cb9ca6f1d4c +size 4580353768 diff --git a/model-00037-of-00097.safetensors b/model-00037-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..394cfb9fc9e9789939d2879bbd9d94ce33c02e41 --- /dev/null +++ b/model-00037-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ea76ad3a2f40adfe9b7021afdf6879726545fa3b1806937ee6768915964fe7 +size 4697621560 diff --git a/model-00038-of-00097.safetensors b/model-00038-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa63b45b8534c1e2d7a4fb21429d9ecfb5b0acd3 --- /dev/null +++ b/model-00038-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea650fd0198435cc5e2010caa9af3d9de15bd4060015eba0a399107b0df79edd +size 4580353760 diff --git a/model-00039-of-00097.safetensors b/model-00039-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7003fd20b0e8b06f276f4c05b75be6ae5c667a70 --- /dev/null +++ b/model-00039-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae40622327bf47cd2d2e84a9f46bcf4944285a4f95c4823ab62eaa69a12bd81b +size 4580353768 diff --git a/model-00040-of-00097.safetensors b/model-00040-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31a9b1c1d6e7afe9637bfde48fe78fc3e5c6734c --- /dev/null +++ b/model-00040-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8dc381dbd9802b9c10ef3ea5381b6102ee46f10a03e6bbefbfebd27cc84ff6 +size 4697621560 diff --git a/model-00041-of-00097.safetensors b/model-00041-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85d6ec6e47b38760c65b12a973a9f0dd1eea8d74 --- /dev/null +++ b/model-00041-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0351cd7bf9042091ada09c990e8c83b3e522e3b3dc31f5b22d3c7cb7d9b9d15 +size 4580353768 diff --git a/model-00042-of-00097.safetensors b/model-00042-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..331c34d3ee7012c301cf5fec3560831be9bec856 --- /dev/null +++ b/model-00042-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1310b5e2ac89e4706f0f4713eaa25032b3986efe0187fc5093f415c44911b885 +size 4961920640 diff --git a/model-00043-of-00097.safetensors b/model-00043-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65035683db82b5dc7ce8c558800e8e939f322e29 --- /dev/null +++ b/model-00043-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342075ad6da780d92d60b7cede87d46624376dd8d652865fa0df1ceed868c718 +size 4903257384 diff --git a/model-00044-of-00097.safetensors b/model-00044-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b3a229609c7fda2bdb6ad55f5707d090586dc9d --- /dev/null +++ b/model-00044-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f634c591af16768bd83a48e9e8cfb98dfb989d94e9810659217887a7019354b4 +size 4580353768 diff --git a/model-00045-of-00097.safetensors b/model-00045-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17081db247d7bed3b178e02ce27733804433a3aa --- /dev/null +++ b/model-00045-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cffc69eff23e642a0d32a6cb94f87d866a417b30b8839e8ce3c6c5e38707bb09 +size 4697621560 diff --git a/model-00046-of-00097.safetensors b/model-00046-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a229730a618c9b9351c2ea69dde9359ed1ebe68e --- /dev/null +++ b/model-00046-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f03e395465aa780fee2bfb90058de4b6dfbf142994426e7dcf9ef1378a3c81 +size 4580353760 diff --git a/model-00047-of-00097.safetensors b/model-00047-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..718a2d36bb503a1a8caede3af644ec3258f17b8d --- /dev/null +++ b/model-00047-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7278047be5f349a8e57f2778b09afdd95c514f06c82489d49b3795e4392bf6a +size 4580353768 diff --git a/model-00048-of-00097.safetensors b/model-00048-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..204e1b9214c32ead159c889a740272d7bbd0013f --- /dev/null +++ b/model-00048-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57292da99f26d5d73bb99cb915010b0a70b568d49d9c14d00466cf77c131cfbb +size 4697621560 diff --git a/model-00049-of-00097.safetensors b/model-00049-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43db99c05f7dcf6aecab3e61a1714b051684a71d --- /dev/null +++ b/model-00049-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816461bb07bbd1261836337f95a092aced48b8bc293877e28026d81bd1df530c +size 4580353768 diff --git a/model-00050-of-00097.safetensors b/model-00050-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82cc9684ddca69751c858845e316e5ba7000f8bb --- /dev/null +++ b/model-00050-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca349c26450e9b684e1c7e9a563cccdee633364150e6a82dae234e4af65b5dda +size 4961920640 diff --git a/model-00051-of-00097.safetensors b/model-00051-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4b7088932af94225924218699050557122e6546 --- /dev/null +++ b/model-00051-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d3fe452f8fcd63e471ed7c1ffa984461d9e9e564e267957f3e193a9995d9f6 +size 4903257384 diff --git a/model-00052-of-00097.safetensors b/model-00052-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45738cb8a7186dbf1a1a2294f8d5e14513bbf688 --- /dev/null +++ b/model-00052-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db903f3a409bffb1cab55b7acf0a486636e24a1be2bde6bcecee2a5a0c997e8 +size 4580353768 diff --git a/model-00053-of-00097.safetensors b/model-00053-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65a1c5cbae95c39e03909d3dd369b8d62936a762 --- /dev/null +++ b/model-00053-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd5f408e05c361675038fba6b0e5538315e0b86c757e3581f842050e3c0de43 +size 4697621560 diff --git a/model-00054-of-00097.safetensors b/model-00054-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94535fe2253947f47f19896cfb39930da44fe689 --- /dev/null +++ b/model-00054-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a282ca770fdd56304d10b10332b064c5b5e0803463716233407b28ebb6b52f +size 4580353760 diff --git a/model-00055-of-00097.safetensors b/model-00055-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..919df770bb5e94ff163e061e58a9603a8e18c985 --- /dev/null +++ b/model-00055-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb7dcf88658e39403af1fff4863e248777c9b0c23698862360b40d06a9090e2 +size 4580353768 diff --git a/model-00056-of-00097.safetensors b/model-00056-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..113eaacbb0c15dc4803f0e7d335f286b8cfb731e --- /dev/null +++ b/model-00056-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5644eb1d63e65d3ca6a17d8a03cf6470e4fcd778e6f965823496a553b46c68c6 +size 4697621560 diff --git a/model-00057-of-00097.safetensors b/model-00057-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a89eeefb945ce0f7b28791b3521a378dd32e83f --- /dev/null +++ b/model-00057-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b701205b91f2d10681f40b72b346df5d988e3e9f56cd68abfed3e7852fad6b81 +size 4580353768 diff --git a/model-00058-of-00097.safetensors b/model-00058-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d37f719e3ae2180a099ad32c909749cc2a816282 --- /dev/null +++ b/model-00058-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b284e729712f379601e9cfe2db69cda210bc9766c5fd02b809f30bb0bef8b171 +size 4961920640 diff --git a/model-00059-of-00097.safetensors b/model-00059-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee5f692fb158dc6101541825954342b06dd961ee --- /dev/null +++ b/model-00059-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bef0f760006bb66abb59d2485c268bdb9da1ae894cc283640d815258763f31 +size 4903257384 diff --git a/model-00060-of-00097.safetensors b/model-00060-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7db5f8d7e246b9972ce8f623567fbb98e4859ec6 --- /dev/null +++ b/model-00060-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61bae2f0ce51feeb0ae393f0f98e3b0b3a19acd90d7e97d321173816257d28f4 +size 4580353768 diff --git a/model-00061-of-00097.safetensors b/model-00061-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33cdd9b5085765b17b06f118078b438c2a175d25 --- /dev/null +++ b/model-00061-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2fddb39a765b9820260de7a9f130adaf14e71db63a9ccab66dc6964df1c7af +size 4697621560 diff --git a/model-00062-of-00097.safetensors b/model-00062-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f00b77327963751c6f3d980c8de2378c9c859d3b --- /dev/null +++ b/model-00062-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4489f37466417fc0e0bd2e776e843e72430d425234cbdaf7ed89d272b9e1b1f2 +size 4580353760 diff --git a/model-00063-of-00097.safetensors b/model-00063-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f30efcc06f42f431cb4be04bdae873ed6748897c --- /dev/null +++ b/model-00063-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623538163a9ec5e9c3b4fe0f96fb9d8591a5f2082ba4b398833707d9857ed542 +size 4580353768 diff --git a/model-00064-of-00097.safetensors b/model-00064-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b8c8c1c0c3418a38d257fb38bfe3bb7dacde615 --- /dev/null +++ b/model-00064-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286eb98549b3db233c06d880e726670040812c21a2d7c25ef4a904368e22c988 +size 4697621560 diff --git a/model-00065-of-00097.safetensors b/model-00065-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..298e67534e7baaa061c4653322525ee79eec61af --- /dev/null +++ b/model-00065-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54cd6ab934075c952c38864c69afd03d09e9ac0c36773d9c3c762edab2e57ef3 +size 4580353768 diff --git a/model-00066-of-00097.safetensors b/model-00066-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a3a9c6a770923ee86dfbebd502bb7965bf446ba --- /dev/null +++ b/model-00066-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081454f516634c42a78a62af08d837c1c9ff86d58f6e3d397dc173e01ef7f137 +size 4961920640 diff --git a/model-00067-of-00097.safetensors b/model-00067-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61c3e879629e8e17ad193c19b6fb5a684decad6f --- /dev/null +++ b/model-00067-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb38fb360c7c93a713bfc15aa11f802282c8f7e02bd10073064b116472dbf43 +size 4903257384 diff --git a/model-00068-of-00097.safetensors b/model-00068-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fd60ea5a5b51fae303ae5193921db0a786c2217 --- /dev/null +++ b/model-00068-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df7ac3fc77f5be0edb5e15452971857e21a13e297d315a37ad1733271aa679c +size 4580353768 diff --git a/model-00069-of-00097.safetensors b/model-00069-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36be60a80698a897ad61964aa04f623868c2794d --- /dev/null +++ b/model-00069-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4761a729b6e6348a8745a1ccff138b45ad5be94af09e8c5e40ba2349e62542 +size 4697621560 diff --git a/model-00070-of-00097.safetensors b/model-00070-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27062f17fc4b567fc3fbc3fd4a568858d471e258 --- /dev/null +++ b/model-00070-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc8fe366e7841f3057021f650877d711593782950c90d096be9747b7e6f2791 +size 4580353760 diff --git a/model-00071-of-00097.safetensors b/model-00071-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9a4bd62c9d628f7480c89aa2e56a508a7fa2392 --- /dev/null +++ b/model-00071-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eaee6572fe4dd2881d78dd8c7df2b123760383d047036fd877c9f1f6f4247ce +size 4580353768 diff --git a/model-00072-of-00097.safetensors b/model-00072-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abaaab8282ba30d12995d1d4f2037a1bb623696e --- /dev/null +++ b/model-00072-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd854814d29e1f65c3dd214126a40365e94da6963defcf40c043869124bd7aa7 +size 4697621560 diff --git a/model-00073-of-00097.safetensors b/model-00073-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..719f9b9efc8dccd7062a658d6c0a48e078f76383 --- /dev/null +++ b/model-00073-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf3253422f26778cd2f200ab6975c1e07be871d060b8958262babab1530fd70 +size 4580353768 diff --git a/model-00074-of-00097.safetensors b/model-00074-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d230f2bbfa535d926e10b9b297e0b10fea91b270 --- /dev/null +++ b/model-00074-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb8ae61dbfda36781d7b79d62bce90887a13a22c5e1d1e6d5d1cee668b90d54 +size 4961920640 diff --git a/model-00075-of-00097.safetensors b/model-00075-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d321025cc40f5d10e80bf84b339eaeb189f65300 --- /dev/null +++ b/model-00075-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33468ed914ecc90ad724f977e30453655134e3a8f07004aed0ab3b6a0bd7c7f +size 4903257384 diff --git a/model-00076-of-00097.safetensors b/model-00076-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15e9013ca35d7d5b342cf2ea03a8fb6b282cf986 --- /dev/null +++ b/model-00076-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a00652938d91622d55f13f60e8d39ee2f7edf174e61d49061633a66a6ba7503 +size 4580353768 diff --git a/model-00077-of-00097.safetensors b/model-00077-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c1f758dd000508ff3d4938f349218eb84c9067a --- /dev/null +++ b/model-00077-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e2ccb76227abc881696f1b795c081c81a6e9824bbc3130f7ce638fe6efb17b +size 4697621560 diff --git a/model-00078-of-00097.safetensors b/model-00078-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4199a94b8abfcea6a7cae00f395177d08eed826f --- /dev/null +++ b/model-00078-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a9d07653da462bc74162457d3e82be5648aa1693261405e19870ea4445bda8 +size 4580353760 diff --git a/model-00079-of-00097.safetensors b/model-00079-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c01b77d7157c5021938045b2fdccc1f56999984 --- /dev/null +++ b/model-00079-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4cbe2a785efef32e72921f1bc49158e3a8943029a71f1243f22bbef2785be1 +size 4580353768 diff --git a/model-00080-of-00097.safetensors b/model-00080-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc2f44370a76f3354e27380f93dbba991854069d --- /dev/null +++ b/model-00080-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e273edfcc7484e2809ace838b064eb17c184d2f8747c19d5babefa814dd8fb +size 4697621560 diff --git a/model-00081-of-00097.safetensors b/model-00081-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4acd5553b742cf94c235631021e348c8e76676c6 --- /dev/null +++ b/model-00081-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c677f3e4423d5af1151e781eda501a03a3ff17d070860f111e1141d0374806b9 +size 4580353768 diff --git a/model-00082-of-00097.safetensors b/model-00082-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d192841631d8feba014530e7a679120002ed749 --- /dev/null +++ b/model-00082-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ff1c69ddc9dcc82c6397284c3b813caced880ac2dc6c4d83fe31798ee165d2 +size 4961920640 diff --git a/model-00083-of-00097.safetensors b/model-00083-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a15028f9df6468101770e36f6f8a9156f687dd9 --- /dev/null +++ b/model-00083-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:431f2112259fce7a61aa69c743c2ef9c8005171c658658d2ead48ec91c6dde02 +size 4903257384 diff --git a/model-00084-of-00097.safetensors b/model-00084-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57dd97d3c92c3407160c3c0a323fa3401e3bcce9 --- /dev/null +++ b/model-00084-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53495d64064e312d84702a02eb434d10a335491b9423d569d2c81f7e93de5e61 +size 4580353768 diff --git a/model-00085-of-00097.safetensors b/model-00085-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b5a6625ad442ce082a7429b5bacdd5321ccdebc --- /dev/null +++ b/model-00085-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e8924b35084abade8a4da942ce3f7a2923fe0746fcf62e42a2034a90950913 +size 4697621560 diff --git a/model-00086-of-00097.safetensors b/model-00086-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd7db10b2c47752c788ff4c1954f021f734873f1 --- /dev/null +++ b/model-00086-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13dab3f5f350be49228a8d62d756403ca2d10efa15bc0ecf34e86e85807f4cc +size 4580353760 diff --git a/model-00087-of-00097.safetensors b/model-00087-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab4fb0777d50cd364b4307da69543c7ea1a8a32d --- /dev/null +++ b/model-00087-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edff9c7708d9e9a6fa22238202476be11f1594a1eee1995af46effee26ad5ca3 +size 4580353768 diff --git a/model-00088-of-00097.safetensors b/model-00088-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f89514a0083b7a88c85ad335ec07a6a4d50730e --- /dev/null +++ b/model-00088-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bae66b345d0ef70f7c7c4ba1575aecaf413f6140130b00234da0b7bc9f04b0 +size 4697621560 diff --git a/model-00089-of-00097.safetensors b/model-00089-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..773406411fcd9457a46b56a7107e8ef92e676c04 --- /dev/null +++ b/model-00089-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c66ca534bbc03ad71a9c0ca50f110eac38512e6d17d2ac9a220a181dd9db245 +size 4580353768 diff --git a/model-00090-of-00097.safetensors b/model-00090-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccf0194abd9ec3fb9df239575ad7459db413c6cf --- /dev/null +++ b/model-00090-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8b68795b0b0ee334c1d61b104361d0b03a8c5f80ee993fbe7d107f59c03026 +size 4961920640 diff --git a/model-00091-of-00097.safetensors b/model-00091-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..834109d7672b5fcdbf475b3283f60a7b7b1502f4 --- /dev/null +++ b/model-00091-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4d95280e74ce548aae7296d551c870742f885d094701bb3b7932b26bdfd5ac +size 4903257384 diff --git a/model-00092-of-00097.safetensors b/model-00092-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b76a6f8ddca042b7735ed147e1a496bbe923a8f --- /dev/null +++ b/model-00092-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe6795e317f41486451a681f372a9c7ec9017262c5fc4c714e7de183cf902dd +size 4580353768 diff --git a/model-00093-of-00097.safetensors b/model-00093-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffe0f192abb85486d95711fbbdf882adcaab6820 --- /dev/null +++ b/model-00093-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8a64b27ac581cc22ff257fc08b5dbabbf88de93139ca8443dcf173826dac2a +size 4697621560 diff --git a/model-00094-of-00097.safetensors b/model-00094-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d678f5d524f6fc0f11267e8a78b6ba54de193ee1 --- /dev/null +++ b/model-00094-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da04130c86e9c8e8e679c7153a1080a2c19bfdd52a961876adb54d8628b4855 +size 4580353760 diff --git a/model-00095-of-00097.safetensors b/model-00095-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e3b4a95f690649c396ea148ad2728f9397285bc --- /dev/null +++ b/model-00095-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e09984efcb9b87fd521a6123bdfa7ac570e73c55e0ad79336e33fb1edbd022c +size 4580353768 diff --git a/model-00096-of-00097.safetensors b/model-00096-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f17102f306b392231e2fa335b8980deb637e663 --- /dev/null +++ b/model-00096-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ca0cf7a4668cce3cb02d37b4234455e0fabf45615fdce68c407c8614d2452b +size 4697621560 diff --git a/model-00097-of-00097.safetensors b/model-00097-of-00097.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de03cbe77f8dd75bb817166a49c96ddd9d91b6ef --- /dev/null +++ b/model-00097-of-00097.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:facbb222416311f99bbd48039344bff5a8d3140cf75e755b85e2d18c35ebc6af +size 3596702536 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..e41707cf7a9b6bc4c45acb6739fc8141be7df3e8 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1150 @@ +{ + "metadata": { + "total_size": 454651932672 + }, + "weight_map": { + "lm_head.weight": "model-00097-of-00097.safetensors", + "model.embed_tokens.weight": "model-00001-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00097.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00097.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00097.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00097.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00097.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00097.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00097.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00097.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00097.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00003-of-00097.safetensors", + "model.layers.1.input_layernorm.weight": "model-00004-of-00097.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00097.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00097.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00097.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00097.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00097.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00017-of-00097.safetensors", + "model.layers.10.input_layernorm.weight": "model-00018-of-00097.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00018-of-00097.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00017-of-00097.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00017-of-00097.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00017-of-00097.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00017-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00097.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00097.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00019-of-00097.safetensors", + "model.layers.11.input_layernorm.weight": "model-00020-of-00097.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00020-of-00097.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00018-of-00097.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00019-of-00097.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00018-of-00097.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00018-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00097.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00097.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00020-of-00097.safetensors", + "model.layers.12.input_layernorm.weight": "model-00022-of-00097.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00022-of-00097.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00020-of-00097.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00020-of-00097.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00020-of-00097.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00020-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00097.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00022-of-00097.safetensors", + "model.layers.13.input_layernorm.weight": "model-00023-of-00097.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00023-of-00097.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00022-of-00097.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00022-of-00097.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00022-of-00097.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00022-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00097.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00097.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00023-of-00097.safetensors", + "model.layers.14.input_layernorm.weight": "model-00025-of-00097.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00025-of-00097.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00023-of-00097.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00023-of-00097.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00023-of-00097.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00023-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00097.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00025-of-00097.safetensors", + "model.layers.15.input_layernorm.weight": "model-00026-of-00097.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00026-of-00097.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00025-of-00097.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00025-of-00097.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00025-of-00097.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00025-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00097.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00097.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00027-of-00097.safetensors", + "model.layers.16.input_layernorm.weight": "model-00028-of-00097.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00028-of-00097.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00026-of-00097.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00027-of-00097.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00026-of-00097.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00026-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00097.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00097.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00028-of-00097.safetensors", + "model.layers.17.input_layernorm.weight": "model-00030-of-00097.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00030-of-00097.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00028-of-00097.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00028-of-00097.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00028-of-00097.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00028-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00097.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00030-of-00097.safetensors", + "model.layers.18.input_layernorm.weight": "model-00031-of-00097.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00031-of-00097.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00030-of-00097.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00030-of-00097.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00030-of-00097.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00030-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00097.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00097.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00031-of-00097.safetensors", + "model.layers.19.input_layernorm.weight": "model-00033-of-00097.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00033-of-00097.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00031-of-00097.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00031-of-00097.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00031-of-00097.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00031-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00097.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00097.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00097.safetensors", + "model.layers.2.input_layernorm.weight": "model-00006-of-00097.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00006-of-00097.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00097.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00097.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00097.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00097.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00033-of-00097.safetensors", + "model.layers.20.input_layernorm.weight": "model-00034-of-00097.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00034-of-00097.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00033-of-00097.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00033-of-00097.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00033-of-00097.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00033-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00097.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00097.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00035-of-00097.safetensors", + "model.layers.21.input_layernorm.weight": "model-00036-of-00097.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00036-of-00097.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00034-of-00097.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00035-of-00097.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00034-of-00097.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00034-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00097.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00097.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00036-of-00097.safetensors", + "model.layers.22.input_layernorm.weight": "model-00038-of-00097.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00038-of-00097.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00036-of-00097.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00036-of-00097.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00036-of-00097.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00036-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00097.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00038-of-00097.safetensors", + "model.layers.23.input_layernorm.weight": "model-00039-of-00097.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00039-of-00097.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00038-of-00097.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00038-of-00097.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00038-of-00097.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00038-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00097.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00097.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00039-of-00097.safetensors", + "model.layers.24.input_layernorm.weight": "model-00041-of-00097.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00041-of-00097.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00039-of-00097.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00039-of-00097.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00039-of-00097.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00039-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00097.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00041-of-00097.safetensors", + "model.layers.25.input_layernorm.weight": "model-00042-of-00097.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00042-of-00097.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00041-of-00097.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00041-of-00097.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00041-of-00097.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00041-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00097.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00097.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00043-of-00097.safetensors", + "model.layers.26.input_layernorm.weight": "model-00044-of-00097.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00044-of-00097.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00042-of-00097.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00043-of-00097.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00042-of-00097.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00042-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00097.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00097.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00044-of-00097.safetensors", + "model.layers.27.input_layernorm.weight": "model-00046-of-00097.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00046-of-00097.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00044-of-00097.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00044-of-00097.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00044-of-00097.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00044-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00097.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00046-of-00097.safetensors", + "model.layers.28.input_layernorm.weight": "model-00047-of-00097.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00047-of-00097.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00046-of-00097.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00046-of-00097.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00046-of-00097.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00046-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00097.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00097.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00047-of-00097.safetensors", + "model.layers.29.input_layernorm.weight": "model-00049-of-00097.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00049-of-00097.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00047-of-00097.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00047-of-00097.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00047-of-00097.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00047-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00097.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00006-of-00097.safetensors", + "model.layers.3.input_layernorm.weight": "model-00007-of-00097.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00007-of-00097.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00006-of-00097.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00006-of-00097.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00006-of-00097.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00006-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00097.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00049-of-00097.safetensors", + "model.layers.30.input_layernorm.weight": "model-00050-of-00097.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00050-of-00097.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00049-of-00097.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00049-of-00097.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00049-of-00097.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00049-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00097.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00097.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00051-of-00097.safetensors", + "model.layers.31.input_layernorm.weight": "model-00052-of-00097.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00052-of-00097.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00050-of-00097.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00051-of-00097.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00050-of-00097.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00050-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00097.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00097.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00052-of-00097.safetensors", + "model.layers.32.input_layernorm.weight": "model-00054-of-00097.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00054-of-00097.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00052-of-00097.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00052-of-00097.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00052-of-00097.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00052-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00097.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00054-of-00097.safetensors", + "model.layers.33.input_layernorm.weight": "model-00055-of-00097.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00055-of-00097.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00054-of-00097.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00054-of-00097.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00054-of-00097.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00054-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00097.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00097.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00055-of-00097.safetensors", + "model.layers.34.input_layernorm.weight": "model-00057-of-00097.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00057-of-00097.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00055-of-00097.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00055-of-00097.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00055-of-00097.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00055-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00097.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00057-of-00097.safetensors", + "model.layers.35.input_layernorm.weight": "model-00058-of-00097.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00058-of-00097.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00057-of-00097.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00057-of-00097.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00057-of-00097.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00057-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00097.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00097.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00059-of-00097.safetensors", + "model.layers.36.input_layernorm.weight": "model-00060-of-00097.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00060-of-00097.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00058-of-00097.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00059-of-00097.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00058-of-00097.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00058-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00060-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00060-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00060-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00061-of-00097.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00097.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00060-of-00097.safetensors", + "model.layers.37.input_layernorm.weight": "model-00062-of-00097.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00062-of-00097.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00060-of-00097.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00060-of-00097.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00060-of-00097.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00060-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00063-of-00097.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00062-of-00097.safetensors", + "model.layers.38.input_layernorm.weight": "model-00063-of-00097.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00063-of-00097.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00062-of-00097.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00062-of-00097.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00062-of-00097.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00062-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00063-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00064-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00065-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00065-of-00097.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00065-of-00097.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00063-of-00097.safetensors", + "model.layers.39.input_layernorm.weight": "model-00065-of-00097.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00065-of-00097.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00063-of-00097.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00063-of-00097.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00063-of-00097.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00063-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00097.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00097.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00097.safetensors", + "model.layers.4.input_layernorm.weight": "model-00009-of-00097.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00009-of-00097.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00097.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00097.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00097.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00065-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00065-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00097.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00065-of-00097.safetensors", + "model.layers.40.input_layernorm.weight": "model-00066-of-00097.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00066-of-00097.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00065-of-00097.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00065-of-00097.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00065-of-00097.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00065-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00067-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00068-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00068-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00068-of-00097.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00068-of-00097.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00067-of-00097.safetensors", + "model.layers.41.input_layernorm.weight": "model-00068-of-00097.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00068-of-00097.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00066-of-00097.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00067-of-00097.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00066-of-00097.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00066-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00097.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00070-of-00097.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00068-of-00097.safetensors", + "model.layers.42.input_layernorm.weight": "model-00070-of-00097.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00070-of-00097.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00068-of-00097.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00068-of-00097.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00068-of-00097.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00068-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00070-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00071-of-00097.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00070-of-00097.safetensors", + "model.layers.43.input_layernorm.weight": "model-00071-of-00097.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00071-of-00097.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00070-of-00097.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00070-of-00097.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00070-of-00097.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00070-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00073-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00073-of-00097.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00073-of-00097.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00071-of-00097.safetensors", + "model.layers.44.input_layernorm.weight": "model-00073-of-00097.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00073-of-00097.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00071-of-00097.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00071-of-00097.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00071-of-00097.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00071-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00073-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00073-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00073-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00073-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00074-of-00097.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00073-of-00097.safetensors", + "model.layers.45.input_layernorm.weight": "model-00074-of-00097.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00074-of-00097.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00073-of-00097.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00073-of-00097.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00073-of-00097.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00073-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00075-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00076-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00076-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00076-of-00097.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00076-of-00097.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00075-of-00097.safetensors", + "model.layers.46.input_layernorm.weight": "model-00076-of-00097.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00076-of-00097.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00074-of-00097.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00075-of-00097.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00074-of-00097.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00074-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00076-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00076-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00077-of-00097.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00078-of-00097.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00076-of-00097.safetensors", + "model.layers.47.input_layernorm.weight": "model-00078-of-00097.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00078-of-00097.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00076-of-00097.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00076-of-00097.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00076-of-00097.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00076-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00097.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00078-of-00097.safetensors", + "model.layers.48.input_layernorm.weight": "model-00079-of-00097.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00079-of-00097.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00078-of-00097.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00078-of-00097.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00078-of-00097.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00078-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00079-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00080-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00097.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00097.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00079-of-00097.safetensors", + "model.layers.49.input_layernorm.weight": "model-00081-of-00097.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00081-of-00097.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00079-of-00097.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00079-of-00097.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00079-of-00097.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00079-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00097.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00009-of-00097.safetensors", + "model.layers.5.input_layernorm.weight": "model-00010-of-00097.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00010-of-00097.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00009-of-00097.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00009-of-00097.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00009-of-00097.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00009-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00081-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00081-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00081-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00081-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00082-of-00097.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00081-of-00097.safetensors", + "model.layers.50.input_layernorm.weight": "model-00082-of-00097.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00082-of-00097.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00081-of-00097.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00081-of-00097.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00081-of-00097.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00081-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00097.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00097.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00083-of-00097.safetensors", + "model.layers.51.input_layernorm.weight": "model-00084-of-00097.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00084-of-00097.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00082-of-00097.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00083-of-00097.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00082-of-00097.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00082-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00084-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00084-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00084-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00085-of-00097.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00086-of-00097.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00084-of-00097.safetensors", + "model.layers.52.input_layernorm.weight": "model-00086-of-00097.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00086-of-00097.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00084-of-00097.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00084-of-00097.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00084-of-00097.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00084-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00086-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00097.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00086-of-00097.safetensors", + "model.layers.53.input_layernorm.weight": "model-00087-of-00097.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00087-of-00097.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00086-of-00097.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00086-of-00097.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00086-of-00097.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00086-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00087-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00088-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00089-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00089-of-00097.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00089-of-00097.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00087-of-00097.safetensors", + "model.layers.54.input_layernorm.weight": "model-00089-of-00097.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00089-of-00097.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00087-of-00097.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00087-of-00097.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00087-of-00097.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00087-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00090-of-00097.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00089-of-00097.safetensors", + "model.layers.55.input_layernorm.weight": "model-00090-of-00097.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00090-of-00097.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00089-of-00097.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00089-of-00097.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00089-of-00097.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00089-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w1.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w2.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w3.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w1.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w2.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w3.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w1.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w2.weight": "model-00091-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w3.weight": "model-00092-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w1.weight": "model-00092-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w2.weight": "model-00092-of-00097.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w3.weight": "model-00092-of-00097.safetensors", + "model.layers.56.block_sparse_moe.gate.weight": "model-00091-of-00097.safetensors", + "model.layers.56.input_layernorm.weight": "model-00092-of-00097.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00092-of-00097.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00090-of-00097.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00091-of-00097.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00090-of-00097.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00090-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w1.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w2.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w1.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w2.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00097.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w3.weight": "model-00094-of-00097.safetensors", + "model.layers.57.block_sparse_moe.gate.weight": "model-00092-of-00097.safetensors", + "model.layers.57.input_layernorm.weight": "model-00094-of-00097.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00094-of-00097.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00092-of-00097.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00092-of-00097.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00092-of-00097.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00092-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w1.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w2.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w3.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w1.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w2.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w3.weight": "model-00094-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w2.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w3.weight": "model-00095-of-00097.safetensors", + "model.layers.58.block_sparse_moe.gate.weight": "model-00094-of-00097.safetensors", + "model.layers.58.input_layernorm.weight": "model-00095-of-00097.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00095-of-00097.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00094-of-00097.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00094-of-00097.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00094-of-00097.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00094-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w2.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w3.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w1.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w2.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w3.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w1.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w2.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w3.weight": "model-00096-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w1.weight": "model-00097-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w2.weight": "model-00097-of-00097.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w3.weight": "model-00097-of-00097.safetensors", + "model.layers.59.block_sparse_moe.gate.weight": "model-00095-of-00097.safetensors", + "model.layers.59.input_layernorm.weight": "model-00097-of-00097.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00097-of-00097.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00095-of-00097.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00095-of-00097.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00095-of-00097.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00095-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00097.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00097.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00011-of-00097.safetensors", + "model.layers.6.input_layernorm.weight": "model-00012-of-00097.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00012-of-00097.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00010-of-00097.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00011-of-00097.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00010-of-00097.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00010-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00097.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00097.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00012-of-00097.safetensors", + "model.layers.7.input_layernorm.weight": "model-00014-of-00097.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00014-of-00097.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00012-of-00097.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00012-of-00097.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00012-of-00097.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00012-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00097.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00014-of-00097.safetensors", + "model.layers.8.input_layernorm.weight": "model-00015-of-00097.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00015-of-00097.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00014-of-00097.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00014-of-00097.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00014-of-00097.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00014-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00097.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00097.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00015-of-00097.safetensors", + "model.layers.9.input_layernorm.weight": "model-00017-of-00097.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00017-of-00097.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00015-of-00097.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00015-of-00097.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00015-of-00097.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00015-of-00097.safetensors", + "model.norm.weight": "model-00097-of-00097.safetensors" + } +}