Upload folder using huggingface_hub
Browse files- config.json +33 -1
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +2 -2
- model-00003-of-00010.safetensors +2 -2
- model-00004-of-00010.safetensors +2 -2
- model-00005-of-00010.safetensors +2 -2
- model-00006-of-00010.safetensors +2 -2
- model-00007-of-00010.safetensors +2 -2
- model-00008-of-00010.safetensors +2 -2
- model-00009-of-00010.safetensors +2 -2
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +33 -97
- recipe.yaml +1 -1
config.json
CHANGED
@@ -27,8 +27,40 @@
|
|
27 |
}
|
28 |
},
|
29 |
"format": "pack-quantized",
|
30 |
-
"global_compression_ratio": 1.
|
31 |
"ignore": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"lm_head"
|
33 |
],
|
34 |
"kv_cache_scheme": null,
|
|
|
27 |
}
|
28 |
},
|
29 |
"format": "pack-quantized",
|
30 |
+
"global_compression_ratio": 1.536450042423991,
|
31 |
"ignore": [
|
32 |
+
"model.layers.0.block_sparse_moe.gate",
|
33 |
+
"model.layers.1.block_sparse_moe.gate",
|
34 |
+
"model.layers.2.block_sparse_moe.gate",
|
35 |
+
"model.layers.3.block_sparse_moe.gate",
|
36 |
+
"model.layers.4.block_sparse_moe.gate",
|
37 |
+
"model.layers.5.block_sparse_moe.gate",
|
38 |
+
"model.layers.6.block_sparse_moe.gate",
|
39 |
+
"model.layers.7.block_sparse_moe.gate",
|
40 |
+
"model.layers.8.block_sparse_moe.gate",
|
41 |
+
"model.layers.9.block_sparse_moe.gate",
|
42 |
+
"model.layers.10.block_sparse_moe.gate",
|
43 |
+
"model.layers.11.block_sparse_moe.gate",
|
44 |
+
"model.layers.12.block_sparse_moe.gate",
|
45 |
+
"model.layers.13.block_sparse_moe.gate",
|
46 |
+
"model.layers.14.block_sparse_moe.gate",
|
47 |
+
"model.layers.15.block_sparse_moe.gate",
|
48 |
+
"model.layers.16.block_sparse_moe.gate",
|
49 |
+
"model.layers.17.block_sparse_moe.gate",
|
50 |
+
"model.layers.18.block_sparse_moe.gate",
|
51 |
+
"model.layers.19.block_sparse_moe.gate",
|
52 |
+
"model.layers.20.block_sparse_moe.gate",
|
53 |
+
"model.layers.21.block_sparse_moe.gate",
|
54 |
+
"model.layers.22.block_sparse_moe.gate",
|
55 |
+
"model.layers.23.block_sparse_moe.gate",
|
56 |
+
"model.layers.24.block_sparse_moe.gate",
|
57 |
+
"model.layers.25.block_sparse_moe.gate",
|
58 |
+
"model.layers.26.block_sparse_moe.gate",
|
59 |
+
"model.layers.27.block_sparse_moe.gate",
|
60 |
+
"model.layers.28.block_sparse_moe.gate",
|
61 |
+
"model.layers.29.block_sparse_moe.gate",
|
62 |
+
"model.layers.30.block_sparse_moe.gate",
|
63 |
+
"model.layers.31.block_sparse_moe.gate",
|
64 |
"lm_head"
|
65 |
],
|
66 |
"kv_cache_scheme": null,
|
model-00001-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffaceffe2687ac26a27dbc1fc269baf05bed8a3d32da8d61d72d466305a4382
|
3 |
+
size 4953482192
|
model-00002-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:050456c3cc3a81b7fd4792ae90ed910e150e842f5da3fb4733785ac161ca8990
|
3 |
+
size 4943033504
|
model-00003-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38c01cf35f08460cb147091fdd8f9a449ccd716b76e5b53e9819a66f0b826964
|
3 |
+
size 4985080736
|
model-00004-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8402a9fa5f40a28edb040ffdfd54808ec1ae7cfef7dc1a8003b0a1d27f8c2418
|
3 |
+
size 4943013320
|
model-00005-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a578d735eb41fef254abbc6e0303196437e40bd4106614cb8144b7686ddf425d
|
3 |
+
size 4943033800
|
model-00006-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:710d41e5952d5a151ca4c98e4f2da8e2aeebc380092ccfbf38dc63f482ca40ba
|
3 |
+
size 4985081040
|
model-00007-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c9e8333ed5883deff52e0b7a9cdd591115c06479607e00b7681fd4d18f1b6fe
|
3 |
+
size 4943013320
|
model-00008-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9f7eb2a354e917258f92547f1352f74697edb6a7f0dc1332c8835472bf2d489
|
3 |
+
size 4985081040
|
model-00009-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7caaabec7462058ab498b2ed42f60a0486ed3cc544c41af936620233a5394053
|
3 |
+
size 4943033800
|
model-00010-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61df7e4e02b1f966fea1ff5ff3a0763f0a95ed74286579e7b4fcf66e768f9eb0
|
3 |
+
size 2360196848
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
@@ -77,9 +77,7 @@
|
|
77 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
78 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
79 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
80 |
-
"model.layers.0.block_sparse_moe.gate.
|
81 |
-
"model.layers.0.block_sparse_moe.gate.weight_scale": "model-00001-of-00010.safetensors",
|
82 |
-
"model.layers.0.block_sparse_moe.gate.weight_shape": "model-00001-of-00010.safetensors",
|
83 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
84 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
85 |
"model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
@@ -166,9 +164,7 @@
|
|
166 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
167 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
168 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
169 |
-
"model.layers.1.block_sparse_moe.gate.
|
170 |
-
"model.layers.1.block_sparse_moe.gate.weight_scale": "model-00001-of-00010.safetensors",
|
171 |
-
"model.layers.1.block_sparse_moe.gate.weight_shape": "model-00001-of-00010.safetensors",
|
172 |
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
173 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
174 |
"model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
@@ -255,9 +251,7 @@
|
|
255 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
256 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
257 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
258 |
-
"model.layers.10.block_sparse_moe.gate.
|
259 |
-
"model.layers.10.block_sparse_moe.gate.weight_scale": "model-00003-of-00010.safetensors",
|
260 |
-
"model.layers.10.block_sparse_moe.gate.weight_shape": "model-00003-of-00010.safetensors",
|
261 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
262 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
263 |
"model.layers.10.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
@@ -344,9 +338,7 @@
|
|
344 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
345 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
346 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
347 |
-
"model.layers.11.block_sparse_moe.gate.
|
348 |
-
"model.layers.11.block_sparse_moe.gate.weight_scale": "model-00004-of-00010.safetensors",
|
349 |
-
"model.layers.11.block_sparse_moe.gate.weight_shape": "model-00004-of-00010.safetensors",
|
350 |
"model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
351 |
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
352 |
"model.layers.11.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
@@ -433,9 +425,7 @@
|
|
433 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
434 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
435 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
436 |
-
"model.layers.12.block_sparse_moe.gate.
|
437 |
-
"model.layers.12.block_sparse_moe.gate.weight_scale": "model-00004-of-00010.safetensors",
|
438 |
-
"model.layers.12.block_sparse_moe.gate.weight_shape": "model-00004-of-00010.safetensors",
|
439 |
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
440 |
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
441 |
"model.layers.12.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
@@ -522,9 +512,7 @@
|
|
522 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
523 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
524 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
525 |
-
"model.layers.13.block_sparse_moe.gate.
|
526 |
-
"model.layers.13.block_sparse_moe.gate.weight_scale": "model-00004-of-00010.safetensors",
|
527 |
-
"model.layers.13.block_sparse_moe.gate.weight_shape": "model-00004-of-00010.safetensors",
|
528 |
"model.layers.13.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
529 |
"model.layers.13.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
530 |
"model.layers.13.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
@@ -611,9 +599,7 @@
|
|
611 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
612 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
613 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
614 |
-
"model.layers.14.block_sparse_moe.gate.
|
615 |
-
"model.layers.14.block_sparse_moe.gate.weight_scale": "model-00005-of-00010.safetensors",
|
616 |
-
"model.layers.14.block_sparse_moe.gate.weight_shape": "model-00005-of-00010.safetensors",
|
617 |
"model.layers.14.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
618 |
"model.layers.14.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
619 |
"model.layers.14.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
@@ -700,9 +686,7 @@
|
|
700 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
701 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
702 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
703 |
-
"model.layers.15.block_sparse_moe.gate.
|
704 |
-
"model.layers.15.block_sparse_moe.gate.weight_scale": "model-00005-of-00010.safetensors",
|
705 |
-
"model.layers.15.block_sparse_moe.gate.weight_shape": "model-00005-of-00010.safetensors",
|
706 |
"model.layers.15.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
707 |
"model.layers.15.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
708 |
"model.layers.15.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
@@ -789,9 +773,7 @@
|
|
789 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
790 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
791 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
792 |
-
"model.layers.16.block_sparse_moe.gate.
|
793 |
-
"model.layers.16.block_sparse_moe.gate.weight_scale": "model-00005-of-00010.safetensors",
|
794 |
-
"model.layers.16.block_sparse_moe.gate.weight_shape": "model-00005-of-00010.safetensors",
|
795 |
"model.layers.16.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
796 |
"model.layers.16.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
797 |
"model.layers.16.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
@@ -878,9 +860,7 @@
|
|
878 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
879 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
880 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
881 |
-
"model.layers.17.block_sparse_moe.gate.
|
882 |
-
"model.layers.17.block_sparse_moe.gate.weight_scale": "model-00006-of-00010.safetensors",
|
883 |
-
"model.layers.17.block_sparse_moe.gate.weight_shape": "model-00006-of-00010.safetensors",
|
884 |
"model.layers.17.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
885 |
"model.layers.17.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
886 |
"model.layers.17.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
@@ -967,9 +947,7 @@
|
|
967 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
968 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
969 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
970 |
-
"model.layers.18.block_sparse_moe.gate.
|
971 |
-
"model.layers.18.block_sparse_moe.gate.weight_scale": "model-00006-of-00010.safetensors",
|
972 |
-
"model.layers.18.block_sparse_moe.gate.weight_shape": "model-00006-of-00010.safetensors",
|
973 |
"model.layers.18.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
974 |
"model.layers.18.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
975 |
"model.layers.18.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
@@ -1056,9 +1034,7 @@
|
|
1056 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
1057 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
1058 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
1059 |
-
"model.layers.19.block_sparse_moe.gate.
|
1060 |
-
"model.layers.19.block_sparse_moe.gate.weight_scale": "model-00006-of-00010.safetensors",
|
1061 |
-
"model.layers.19.block_sparse_moe.gate.weight_shape": "model-00006-of-00010.safetensors",
|
1062 |
"model.layers.19.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
1063 |
"model.layers.19.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
1064 |
"model.layers.19.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
@@ -1145,9 +1121,7 @@
|
|
1145 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
1146 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
1147 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
1148 |
-
"model.layers.2.block_sparse_moe.gate.
|
1149 |
-
"model.layers.2.block_sparse_moe.gate.weight_scale": "model-00001-of-00010.safetensors",
|
1150 |
-
"model.layers.2.block_sparse_moe.gate.weight_shape": "model-00001-of-00010.safetensors",
|
1151 |
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
1152 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
1153 |
"model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
@@ -1234,9 +1208,7 @@
|
|
1234 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1235 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1236 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1237 |
-
"model.layers.20.block_sparse_moe.gate.
|
1238 |
-
"model.layers.20.block_sparse_moe.gate.weight_scale": "model-00006-of-00010.safetensors",
|
1239 |
-
"model.layers.20.block_sparse_moe.gate.weight_shape": "model-00006-of-00010.safetensors",
|
1240 |
"model.layers.20.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1241 |
"model.layers.20.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1242 |
"model.layers.20.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
@@ -1323,9 +1295,7 @@
|
|
1323 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1324 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1325 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1326 |
-
"model.layers.21.block_sparse_moe.gate.
|
1327 |
-
"model.layers.21.block_sparse_moe.gate.weight_scale": "model-00007-of-00010.safetensors",
|
1328 |
-
"model.layers.21.block_sparse_moe.gate.weight_shape": "model-00007-of-00010.safetensors",
|
1329 |
"model.layers.21.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1330 |
"model.layers.21.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1331 |
"model.layers.21.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
@@ -1412,9 +1382,7 @@
|
|
1412 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1413 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1414 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1415 |
-
"model.layers.22.block_sparse_moe.gate.
|
1416 |
-
"model.layers.22.block_sparse_moe.gate.weight_scale": "model-00007-of-00010.safetensors",
|
1417 |
-
"model.layers.22.block_sparse_moe.gate.weight_shape": "model-00007-of-00010.safetensors",
|
1418 |
"model.layers.22.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1419 |
"model.layers.22.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1420 |
"model.layers.22.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
@@ -1501,9 +1469,7 @@
|
|
1501 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1502 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1503 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1504 |
-
"model.layers.23.block_sparse_moe.gate.
|
1505 |
-
"model.layers.23.block_sparse_moe.gate.weight_scale": "model-00007-of-00010.safetensors",
|
1506 |
-
"model.layers.23.block_sparse_moe.gate.weight_shape": "model-00007-of-00010.safetensors",
|
1507 |
"model.layers.23.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1508 |
"model.layers.23.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1509 |
"model.layers.23.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
@@ -1590,9 +1556,7 @@
|
|
1590 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1591 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1592 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1593 |
-
"model.layers.24.block_sparse_moe.gate.
|
1594 |
-
"model.layers.24.block_sparse_moe.gate.weight_scale": "model-00008-of-00010.safetensors",
|
1595 |
-
"model.layers.24.block_sparse_moe.gate.weight_shape": "model-00008-of-00010.safetensors",
|
1596 |
"model.layers.24.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1597 |
"model.layers.24.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1598 |
"model.layers.24.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
@@ -1679,9 +1643,7 @@
|
|
1679 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1680 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1681 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1682 |
-
"model.layers.25.block_sparse_moe.gate.
|
1683 |
-
"model.layers.25.block_sparse_moe.gate.weight_scale": "model-00008-of-00010.safetensors",
|
1684 |
-
"model.layers.25.block_sparse_moe.gate.weight_shape": "model-00008-of-00010.safetensors",
|
1685 |
"model.layers.25.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1686 |
"model.layers.25.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1687 |
"model.layers.25.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
@@ -1768,9 +1730,7 @@
|
|
1768 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1769 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1770 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1771 |
-
"model.layers.26.block_sparse_moe.gate.
|
1772 |
-
"model.layers.26.block_sparse_moe.gate.weight_scale": "model-00008-of-00010.safetensors",
|
1773 |
-
"model.layers.26.block_sparse_moe.gate.weight_shape": "model-00008-of-00010.safetensors",
|
1774 |
"model.layers.26.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1775 |
"model.layers.26.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1776 |
"model.layers.26.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
@@ -1857,9 +1817,7 @@
|
|
1857 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
1858 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1859 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
1860 |
-
"model.layers.27.block_sparse_moe.gate.
|
1861 |
-
"model.layers.27.block_sparse_moe.gate.weight_scale": "model-00008-of-00010.safetensors",
|
1862 |
-
"model.layers.27.block_sparse_moe.gate.weight_shape": "model-00008-of-00010.safetensors",
|
1863 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1864 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1865 |
"model.layers.27.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
@@ -1946,9 +1904,7 @@
|
|
1946 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
1947 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1948 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
1949 |
-
"model.layers.28.block_sparse_moe.gate.
|
1950 |
-
"model.layers.28.block_sparse_moe.gate.weight_scale": "model-00009-of-00010.safetensors",
|
1951 |
-
"model.layers.28.block_sparse_moe.gate.weight_shape": "model-00009-of-00010.safetensors",
|
1952 |
"model.layers.28.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1953 |
"model.layers.28.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1954 |
"model.layers.28.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
@@ -2035,9 +1991,7 @@
|
|
2035 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
2036 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2037 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
2038 |
-
"model.layers.29.block_sparse_moe.gate.
|
2039 |
-
"model.layers.29.block_sparse_moe.gate.weight_scale": "model-00009-of-00010.safetensors",
|
2040 |
-
"model.layers.29.block_sparse_moe.gate.weight_shape": "model-00009-of-00010.safetensors",
|
2041 |
"model.layers.29.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
2042 |
"model.layers.29.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
2043 |
"model.layers.29.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
@@ -2124,9 +2078,7 @@
|
|
2124 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2125 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2126 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2127 |
-
"model.layers.3.block_sparse_moe.gate.
|
2128 |
-
"model.layers.3.block_sparse_moe.gate.weight_scale": "model-00001-of-00010.safetensors",
|
2129 |
-
"model.layers.3.block_sparse_moe.gate.weight_shape": "model-00001-of-00010.safetensors",
|
2130 |
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2131 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2132 |
"model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
@@ -2213,9 +2165,7 @@
|
|
2213 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_packed": "model-00010-of-00010.safetensors",
|
2214 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_scale": "model-00010-of-00010.safetensors",
|
2215 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_shape": "model-00010-of-00010.safetensors",
|
2216 |
-
"model.layers.30.block_sparse_moe.gate.
|
2217 |
-
"model.layers.30.block_sparse_moe.gate.weight_scale": "model-00009-of-00010.safetensors",
|
2218 |
-
"model.layers.30.block_sparse_moe.gate.weight_shape": "model-00009-of-00010.safetensors",
|
2219 |
"model.layers.30.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
2220 |
"model.layers.30.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
2221 |
"model.layers.30.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
@@ -2302,9 +2252,7 @@
|
|
2302 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_packed": "model-00010-of-00010.safetensors",
|
2303 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_scale": "model-00010-of-00010.safetensors",
|
2304 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_shape": "model-00010-of-00010.safetensors",
|
2305 |
-
"model.layers.31.block_sparse_moe.gate.
|
2306 |
-
"model.layers.31.block_sparse_moe.gate.weight_scale": "model-00010-of-00010.safetensors",
|
2307 |
-
"model.layers.31.block_sparse_moe.gate.weight_shape": "model-00010-of-00010.safetensors",
|
2308 |
"model.layers.31.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
2309 |
"model.layers.31.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
2310 |
"model.layers.31.self_attn.k_proj.weight_packed": "model-00010-of-00010.safetensors",
|
@@ -2391,9 +2339,7 @@
|
|
2391 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2392 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2393 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2394 |
-
"model.layers.4.block_sparse_moe.gate.
|
2395 |
-
"model.layers.4.block_sparse_moe.gate.weight_scale": "model-00002-of-00010.safetensors",
|
2396 |
-
"model.layers.4.block_sparse_moe.gate.weight_shape": "model-00002-of-00010.safetensors",
|
2397 |
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2398 |
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2399 |
"model.layers.4.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
@@ -2480,9 +2426,7 @@
|
|
2480 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2481 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2482 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2483 |
-
"model.layers.5.block_sparse_moe.gate.
|
2484 |
-
"model.layers.5.block_sparse_moe.gate.weight_scale": "model-00002-of-00010.safetensors",
|
2485 |
-
"model.layers.5.block_sparse_moe.gate.weight_shape": "model-00002-of-00010.safetensors",
|
2486 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2487 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2488 |
"model.layers.5.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
@@ -2569,9 +2513,7 @@
|
|
2569 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2570 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2571 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2572 |
-
"model.layers.6.block_sparse_moe.gate.
|
2573 |
-
"model.layers.6.block_sparse_moe.gate.weight_scale": "model-00002-of-00010.safetensors",
|
2574 |
-
"model.layers.6.block_sparse_moe.gate.weight_shape": "model-00002-of-00010.safetensors",
|
2575 |
"model.layers.6.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2576 |
"model.layers.6.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2577 |
"model.layers.6.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
@@ -2658,9 +2600,7 @@
|
|
2658 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2659 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2660 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2661 |
-
"model.layers.7.block_sparse_moe.gate.
|
2662 |
-
"model.layers.7.block_sparse_moe.gate.weight_scale": "model-00003-of-00010.safetensors",
|
2663 |
-
"model.layers.7.block_sparse_moe.gate.weight_shape": "model-00003-of-00010.safetensors",
|
2664 |
"model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2665 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2666 |
"model.layers.7.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
@@ -2747,9 +2687,7 @@
|
|
2747 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2748 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2749 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2750 |
-
"model.layers.8.block_sparse_moe.gate.
|
2751 |
-
"model.layers.8.block_sparse_moe.gate.weight_scale": "model-00003-of-00010.safetensors",
|
2752 |
-
"model.layers.8.block_sparse_moe.gate.weight_shape": "model-00003-of-00010.safetensors",
|
2753 |
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2754 |
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2755 |
"model.layers.8.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
@@ -2836,9 +2774,7 @@
|
|
2836 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2837 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2838 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2839 |
-
"model.layers.9.block_sparse_moe.gate.
|
2840 |
-
"model.layers.9.block_sparse_moe.gate.weight_scale": "model-00003-of-00010.safetensors",
|
2841 |
-
"model.layers.9.block_sparse_moe.gate.weight_shape": "model-00003-of-00010.safetensors",
|
2842 |
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2843 |
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2844 |
"model.layers.9.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 46983698432
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
|
|
77 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
78 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
79 |
"model.layers.0.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
80 |
+
"model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00010.safetensors",
|
|
|
|
|
81 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
82 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
83 |
"model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
|
|
164 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
165 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
166 |
"model.layers.1.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
167 |
+
"model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00010.safetensors",
|
|
|
|
|
168 |
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
169 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
170 |
"model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
|
|
251 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
252 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
253 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
254 |
+
"model.layers.10.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
|
|
|
|
255 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
256 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
257 |
"model.layers.10.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
|
|
338 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
339 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
340 |
"model.layers.11.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
341 |
+
"model.layers.11.block_sparse_moe.gate.weight": "model-00004-of-00010.safetensors",
|
|
|
|
|
342 |
"model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
343 |
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
344 |
"model.layers.11.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
|
|
425 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_packed": "model-00004-of-00010.safetensors",
|
426 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
427 |
"model.layers.12.block_sparse_moe.experts.7.w3.weight_shape": "model-00004-of-00010.safetensors",
|
428 |
+
"model.layers.12.block_sparse_moe.gate.weight": "model-00004-of-00010.safetensors",
|
|
|
|
|
429 |
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
430 |
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
431 |
"model.layers.12.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
|
|
512 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
513 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
514 |
"model.layers.13.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
515 |
+
"model.layers.13.block_sparse_moe.gate.weight": "model-00004-of-00010.safetensors",
|
|
|
|
|
516 |
"model.layers.13.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
517 |
"model.layers.13.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
518 |
"model.layers.13.self_attn.k_proj.weight_packed": "model-00004-of-00010.safetensors",
|
|
|
599 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
600 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
601 |
"model.layers.14.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
602 |
+
"model.layers.14.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
|
|
|
|
603 |
"model.layers.14.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
604 |
"model.layers.14.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
605 |
"model.layers.14.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
|
|
686 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_packed": "model-00005-of-00010.safetensors",
|
687 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
688 |
"model.layers.15.block_sparse_moe.experts.7.w3.weight_shape": "model-00005-of-00010.safetensors",
|
689 |
+
"model.layers.15.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
|
|
|
|
690 |
"model.layers.15.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
691 |
"model.layers.15.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
692 |
"model.layers.15.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
|
|
773 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
774 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
775 |
"model.layers.16.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
776 |
+
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
|
|
|
|
777 |
"model.layers.16.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
778 |
"model.layers.16.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
779 |
"model.layers.16.self_attn.k_proj.weight_packed": "model-00005-of-00010.safetensors",
|
|
|
860 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
861 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
862 |
"model.layers.17.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
863 |
+
"model.layers.17.block_sparse_moe.gate.weight": "model-00006-of-00010.safetensors",
|
|
|
|
|
864 |
"model.layers.17.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
865 |
"model.layers.17.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
866 |
"model.layers.17.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
|
|
947 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
948 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
949 |
"model.layers.18.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
950 |
+
"model.layers.18.block_sparse_moe.gate.weight": "model-00006-of-00010.safetensors",
|
|
|
|
|
951 |
"model.layers.18.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
952 |
"model.layers.18.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
953 |
"model.layers.18.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
|
|
1034 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_packed": "model-00006-of-00010.safetensors",
|
1035 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_scale": "model-00006-of-00010.safetensors",
|
1036 |
"model.layers.19.block_sparse_moe.experts.7.w3.weight_shape": "model-00006-of-00010.safetensors",
|
1037 |
+
"model.layers.19.block_sparse_moe.gate.weight": "model-00006-of-00010.safetensors",
|
|
|
|
|
1038 |
"model.layers.19.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
1039 |
"model.layers.19.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
1040 |
"model.layers.19.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
|
|
1121 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_packed": "model-00001-of-00010.safetensors",
|
1122 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_scale": "model-00001-of-00010.safetensors",
|
1123 |
"model.layers.2.block_sparse_moe.experts.7.w3.weight_shape": "model-00001-of-00010.safetensors",
|
1124 |
+
"model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00010.safetensors",
|
|
|
|
|
1125 |
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
1126 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
1127 |
"model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
|
|
1208 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1209 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1210 |
"model.layers.20.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1211 |
+
"model.layers.20.block_sparse_moe.gate.weight": "model-00006-of-00010.safetensors",
|
|
|
|
|
1212 |
"model.layers.20.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1213 |
"model.layers.20.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1214 |
"model.layers.20.self_attn.k_proj.weight_packed": "model-00006-of-00010.safetensors",
|
|
|
1295 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1296 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1297 |
"model.layers.21.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1298 |
+
"model.layers.21.block_sparse_moe.gate.weight": "model-00007-of-00010.safetensors",
|
|
|
|
|
1299 |
"model.layers.21.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1300 |
"model.layers.21.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1301 |
"model.layers.21.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
|
|
1382 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_packed": "model-00007-of-00010.safetensors",
|
1383 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1384 |
"model.layers.22.block_sparse_moe.experts.7.w3.weight_shape": "model-00007-of-00010.safetensors",
|
1385 |
+
"model.layers.22.block_sparse_moe.gate.weight": "model-00007-of-00010.safetensors",
|
|
|
|
|
1386 |
"model.layers.22.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
1387 |
"model.layers.22.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
1388 |
"model.layers.22.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
|
|
1469 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1470 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1471 |
"model.layers.23.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1472 |
+
"model.layers.23.block_sparse_moe.gate.weight": "model-00007-of-00010.safetensors",
|
|
|
|
|
1473 |
"model.layers.23.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1474 |
"model.layers.23.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1475 |
"model.layers.23.self_attn.k_proj.weight_packed": "model-00007-of-00010.safetensors",
|
|
|
1556 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1557 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1558 |
"model.layers.24.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1559 |
+
"model.layers.24.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
|
|
|
|
1560 |
"model.layers.24.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1561 |
"model.layers.24.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1562 |
"model.layers.24.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
|
|
1643 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1644 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1645 |
"model.layers.25.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1646 |
+
"model.layers.25.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
|
|
|
|
1647 |
"model.layers.25.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1648 |
"model.layers.25.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1649 |
"model.layers.25.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
|
|
1730 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_packed": "model-00008-of-00010.safetensors",
|
1731 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1732 |
"model.layers.26.block_sparse_moe.experts.7.w3.weight_shape": "model-00008-of-00010.safetensors",
|
1733 |
+
"model.layers.26.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
|
|
|
|
1734 |
"model.layers.26.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
1735 |
"model.layers.26.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
1736 |
"model.layers.26.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
|
|
1817 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
1818 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1819 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
1820 |
+
"model.layers.27.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
|
|
|
|
1821 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1822 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1823 |
"model.layers.27.self_attn.k_proj.weight_packed": "model-00008-of-00010.safetensors",
|
|
|
1904 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
1905 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1906 |
"model.layers.28.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
1907 |
+
"model.layers.28.block_sparse_moe.gate.weight": "model-00009-of-00010.safetensors",
|
|
|
|
|
1908 |
"model.layers.28.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1909 |
"model.layers.28.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1910 |
"model.layers.28.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
|
|
1991 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_packed": "model-00009-of-00010.safetensors",
|
1992 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1993 |
"model.layers.29.block_sparse_moe.experts.7.w3.weight_shape": "model-00009-of-00010.safetensors",
|
1994 |
+
"model.layers.29.block_sparse_moe.gate.weight": "model-00009-of-00010.safetensors",
|
|
|
|
|
1995 |
"model.layers.29.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1996 |
"model.layers.29.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1997 |
"model.layers.29.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
|
|
2078 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2079 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2080 |
"model.layers.3.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2081 |
+
"model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00010.safetensors",
|
|
|
|
|
2082 |
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2083 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2084 |
"model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00010.safetensors",
|
|
|
2165 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_packed": "model-00010-of-00010.safetensors",
|
2166 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_scale": "model-00010-of-00010.safetensors",
|
2167 |
"model.layers.30.block_sparse_moe.experts.7.w3.weight_shape": "model-00010-of-00010.safetensors",
|
2168 |
+
"model.layers.30.block_sparse_moe.gate.weight": "model-00009-of-00010.safetensors",
|
|
|
|
|
2169 |
"model.layers.30.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
2170 |
"model.layers.30.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
2171 |
"model.layers.30.self_attn.k_proj.weight_packed": "model-00009-of-00010.safetensors",
|
|
|
2252 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_packed": "model-00010-of-00010.safetensors",
|
2253 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_scale": "model-00010-of-00010.safetensors",
|
2254 |
"model.layers.31.block_sparse_moe.experts.7.w3.weight_shape": "model-00010-of-00010.safetensors",
|
2255 |
+
"model.layers.31.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors",
|
|
|
|
|
2256 |
"model.layers.31.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
2257 |
"model.layers.31.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
2258 |
"model.layers.31.self_attn.k_proj.weight_packed": "model-00010-of-00010.safetensors",
|
|
|
2339 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2340 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2341 |
"model.layers.4.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2342 |
+
"model.layers.4.block_sparse_moe.gate.weight": "model-00002-of-00010.safetensors",
|
|
|
|
|
2343 |
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2344 |
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2345 |
"model.layers.4.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
|
|
2426 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_packed": "model-00002-of-00010.safetensors",
|
2427 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2428 |
"model.layers.5.block_sparse_moe.experts.7.w3.weight_shape": "model-00002-of-00010.safetensors",
|
2429 |
+
"model.layers.5.block_sparse_moe.gate.weight": "model-00002-of-00010.safetensors",
|
|
|
|
|
2430 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
2431 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
2432 |
"model.layers.5.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
|
|
2513 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2514 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2515 |
"model.layers.6.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2516 |
+
"model.layers.6.block_sparse_moe.gate.weight": "model-00002-of-00010.safetensors",
|
|
|
|
|
2517 |
"model.layers.6.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2518 |
"model.layers.6.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2519 |
"model.layers.6.self_attn.k_proj.weight_packed": "model-00002-of-00010.safetensors",
|
|
|
2600 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2601 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2602 |
"model.layers.7.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2603 |
+
"model.layers.7.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
|
|
|
|
2604 |
"model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2605 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2606 |
"model.layers.7.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
|
|
2687 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2688 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2689 |
"model.layers.8.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2690 |
+
"model.layers.8.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
|
|
|
|
2691 |
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2692 |
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2693 |
"model.layers.8.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
|
|
2774 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_packed": "model-00003-of-00010.safetensors",
|
2775 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_scale": "model-00003-of-00010.safetensors",
|
2776 |
"model.layers.9.block_sparse_moe.experts.7.w3.weight_shape": "model-00003-of-00010.safetensors",
|
2777 |
+
"model.layers.9.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
|
|
|
|
2778 |
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
2779 |
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
2780 |
"model.layers.9.self_attn.k_proj.weight_packed": "model-00003-of-00010.safetensors",
|
recipe.yaml
CHANGED
@@ -3,5 +3,5 @@ DEFAULT_stage:
|
|
3 |
GPTQModifier:
|
4 |
sequential_update: true
|
5 |
targets: Linear
|
6 |
-
ignore: [lm_head]
|
7 |
scheme: W8A16
|
|
|
3 |
GPTQModifier:
|
4 |
sequential_update: true
|
5 |
targets: Linear
|
6 |
+
ignore: [lm_head, 're:.*block_sparse_moe\.gate']
|
7 |
scheme: W8A16
|