Push folder to HuggingFace Hub
Browse files- config.json +20 -20
- model-00001-of-00008.safetensors +1 -1
- model-00002-of-00008.safetensors +1 -1
- model-00003-of-00008.safetensors +2 -2
- model-00004-of-00008.safetensors +2 -2
- model-00005-of-00008.safetensors +1 -1
- model-00006-of-00008.safetensors +1 -1
- model-00007-of-00008.safetensors +2 -2
- model-00008-of-00008.safetensors +2 -2
- model.safetensors.index.json +4 -4
- recipe.yaml +1 -1
config.json
CHANGED
@@ -6,7 +6,24 @@
|
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"bos_token_id": 128000,
|
9 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
"config_groups": {
|
11 |
"group_0": {
|
12 |
"input_activations": null,
|
@@ -29,7 +46,7 @@
|
|
29 |
}
|
30 |
},
|
31 |
"format": "pack-quantized",
|
32 |
-
"global_compression_ratio": 1.
|
33 |
"ignore": [
|
34 |
"lm_head"
|
35 |
],
|
@@ -43,28 +60,11 @@
|
|
43 |
"observer_kwargs": {},
|
44 |
"strategy": "tensor",
|
45 |
"symmetric": true,
|
46 |
-
"type": "
|
47 |
},
|
48 |
"quant_method": "compressed-tensors",
|
49 |
"quantization_status": "compressed"
|
50 |
},
|
51 |
-
"eos_token_id": [
|
52 |
-
128001,
|
53 |
-
128008,
|
54 |
-
128009
|
55 |
-
],
|
56 |
-
"head_dim": 128,
|
57 |
-
"hidden_act": "silu",
|
58 |
-
"hidden_size": 8192,
|
59 |
-
"initializer_range": 0.02,
|
60 |
-
"intermediate_size": 28672,
|
61 |
-
"max_position_embeddings": 131072,
|
62 |
-
"mlp_bias": false,
|
63 |
-
"model_type": "llama",
|
64 |
-
"num_attention_heads": 64,
|
65 |
-
"num_hidden_layers": 80,
|
66 |
-
"num_key_value_heads": 8,
|
67 |
-
"pretraining_tp": 1,
|
68 |
"rms_norm_eps": 1e-05,
|
69 |
"rope_scaling": {
|
70 |
"factor": 8.0,
|
|
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"bos_token_id": 128000,
|
9 |
+
"eos_token_id": [
|
10 |
+
128001,
|
11 |
+
128008,
|
12 |
+
128009
|
13 |
+
],
|
14 |
+
"head_dim": 128,
|
15 |
+
"hidden_act": "silu",
|
16 |
+
"hidden_size": 8192,
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"intermediate_size": 28672,
|
19 |
+
"max_position_embeddings": 131072,
|
20 |
+
"mlp_bias": false,
|
21 |
+
"model_type": "llama",
|
22 |
+
"num_attention_heads": 64,
|
23 |
+
"num_hidden_layers": 80,
|
24 |
+
"num_key_value_heads": 8,
|
25 |
+
"pretraining_tp": 1,
|
26 |
+
"quantization_config": {
|
27 |
"config_groups": {
|
28 |
"group_0": {
|
29 |
"input_activations": null,
|
|
|
46 |
}
|
47 |
},
|
48 |
"format": "pack-quantized",
|
49 |
+
"global_compression_ratio": 1.90302080160764,
|
50 |
"ignore": [
|
51 |
"lm_head"
|
52 |
],
|
|
|
60 |
"observer_kwargs": {},
|
61 |
"strategy": "tensor",
|
62 |
"symmetric": true,
|
63 |
+
"type": "float"
|
64 |
},
|
65 |
"quant_method": "compressed-tensors",
|
66 |
"quantization_status": "compressed"
|
67 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
"rms_norm_eps": 1e-05,
|
69 |
"rope_scaling": {
|
70 |
"factor": 8.0,
|
model-00001-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4947660132
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f78e8be929613c66e6e58dff2b658fbc990a8e2efd2b698b0495274ab0ffe19
|
3 |
size 4947660132
|
model-00002-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4974575684
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95a6b35c2cd34f2b13ccbe9cc1f0c477ae80d7ecc0254bd5471d63d781101378
|
3 |
size 4974575684
|
model-00003-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad932fe2275531be72bbda787acd2a2656435ed7a48c4e4acfe41d971fff614a
|
3 |
+
size 4974608984
|
model-00004-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0eb324a0d91e44718d5afacf76f9e657052bb453bd071b417e2010c9f438562
|
3 |
+
size 4931323108
|
model-00005-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4974575772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5066e4b9ec3627103a9d7b4f41ac8e3a4d414fb340ee040fa6c51490196bce2c
|
3 |
size 4974575772
|
model-00006-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4974575772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdf3146096073861f9e51c43c0fee6f94c68075d4b2dde9bf1f223b04c09ae7d
|
3 |
size 4974575772
|
model-00007-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7a6e97a47ad23f785d24ec1e1d3a7f4a05a840bd68f5bcccd9d124eacca0820
|
3 |
+
size 4974608984
|
model-00008-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf47604d3b1484336f60ebb5e11ff0b63962fc79e372e6114a85a7ff30077ef
|
3 |
+
size 4748707004
|
model.safetensors.index.json
CHANGED
@@ -569,7 +569,7 @@
|
|
569 |
"model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
|
570 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
|
571 |
"model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
|
572 |
-
"model.layers.29.self_attn.k_scale": "model-
|
573 |
"model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
|
574 |
"model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
|
575 |
"model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
|
@@ -579,7 +579,7 @@
|
|
579 |
"model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
|
580 |
"model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
|
581 |
"model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
|
582 |
-
"model.layers.29.self_attn.v_scale": "model-
|
583 |
"model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
584 |
"model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
|
585 |
"model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
@@ -1819,7 +1819,7 @@
|
|
1819 |
"model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1820 |
"model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1821 |
"model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
|
1822 |
-
"model.layers.74.self_attn.k_scale": "model-
|
1823 |
"model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1824 |
"model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1825 |
"model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
|
@@ -1829,7 +1829,7 @@
|
|
1829 |
"model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1830 |
"model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1831 |
"model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
|
1832 |
-
"model.layers.74.self_attn.v_scale": "model-
|
1833 |
"model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
1834 |
"model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1835 |
"model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",
|
|
|
569 |
"model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
|
570 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
|
571 |
"model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
|
572 |
+
"model.layers.29.self_attn.k_scale": "model-00003-of-00008.safetensors",
|
573 |
"model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
|
574 |
"model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
|
575 |
"model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
|
|
|
579 |
"model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
|
580 |
"model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
|
581 |
"model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
|
582 |
+
"model.layers.29.self_attn.v_scale": "model-00003-of-00008.safetensors",
|
583 |
"model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
584 |
"model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
|
585 |
"model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
|
|
1819 |
"model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1820 |
"model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1821 |
"model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
|
1822 |
+
"model.layers.74.self_attn.k_scale": "model-00007-of-00008.safetensors",
|
1823 |
"model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1824 |
"model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1825 |
"model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
|
|
|
1829 |
"model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1830 |
"model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
|
1831 |
"model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
|
1832 |
+
"model.layers.74.self_attn.v_scale": "model-00007-of-00008.safetensors",
|
1833 |
"model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
1834 |
"model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
|
1835 |
"model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",
|
recipe.yaml
CHANGED
@@ -4,4 +4,4 @@ DEFAULT_stage:
|
|
4 |
ignore: [lm_head]
|
5 |
targets: Linear
|
6 |
scheme: W4A16
|
7 |
-
kv_cache_scheme: {num_bits: 8, type:
|
|
|
4 |
ignore: [lm_head]
|
5 |
targets: Linear
|
6 |
scheme: W4A16
|
7 |
+
kv_cache_scheme: {num_bits: 8, type: float, symmetric: true, strategy: tensor, dynamic: false}
|