stan-hua commited on
Commit
5bfd333
1 Parent(s): 1f93827

Push folder to HuggingFace Hub

Browse files
config.json CHANGED
@@ -6,7 +6,24 @@
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "compression_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "config_groups": {
11
  "group_0": {
12
  "input_activations": null,
@@ -29,7 +46,7 @@
29
  }
30
  },
31
  "format": "pack-quantized",
32
- "global_compression_ratio": 1.9030208009670313,
33
  "ignore": [
34
  "lm_head"
35
  ],
@@ -43,28 +60,11 @@
43
  "observer_kwargs": {},
44
  "strategy": "tensor",
45
  "symmetric": true,
46
- "type": "int"
47
  },
48
  "quant_method": "compressed-tensors",
49
  "quantization_status": "compressed"
50
  },
51
- "eos_token_id": [
52
- 128001,
53
- 128008,
54
- 128009
55
- ],
56
- "head_dim": 128,
57
- "hidden_act": "silu",
58
- "hidden_size": 8192,
59
- "initializer_range": 0.02,
60
- "intermediate_size": 28672,
61
- "max_position_embeddings": 131072,
62
- "mlp_bias": false,
63
- "model_type": "llama",
64
- "num_attention_heads": 64,
65
- "num_hidden_layers": 80,
66
- "num_key_value_heads": 8,
67
- "pretraining_tp": 1,
68
  "rms_norm_eps": 1e-05,
69
  "rope_scaling": {
70
  "factor": 8.0,
 
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 8192,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 28672,
19
+ "max_position_embeddings": 131072,
20
+ "mlp_bias": false,
21
+ "model_type": "llama",
22
+ "num_attention_heads": 64,
23
+ "num_hidden_layers": 80,
24
+ "num_key_value_heads": 8,
25
+ "pretraining_tp": 1,
26
+ "quantization_config": {
27
  "config_groups": {
28
  "group_0": {
29
  "input_activations": null,
 
46
  }
47
  },
48
  "format": "pack-quantized",
49
+ "global_compression_ratio": 1.90302080160764,
50
  "ignore": [
51
  "lm_head"
52
  ],
 
60
  "observer_kwargs": {},
61
  "strategy": "tensor",
62
  "symmetric": true,
63
+ "type": "float"
64
  },
65
  "quant_method": "compressed-tensors",
66
  "quantization_status": "compressed"
67
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  "rms_norm_eps": 1e-05,
69
  "rope_scaling": {
70
  "factor": 8.0,
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b802d430c85e53c7fb93a310132aaf3107670b5f6c23aab224f1bfe3ea6cc7c
3
  size 4947660132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f78e8be929613c66e6e58dff2b658fbc990a8e2efd2b698b0495274ab0ffe19
3
  size 4947660132
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5605b43be0b01e79edbd29f76ccad1aadff2ae514d28666c4e9a77a47e08c6b7
3
  size 4974575684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a6b35c2cd34f2b13ccbe9cc1f0c477ae80d7ecc0254bd5471d63d781101378
3
  size 4974575684
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62d28dd8464a3d04f5ddb99790b49b4184e8d186e913f23faafab8f522555a25
3
- size 4974608772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad932fe2275531be72bbda787acd2a2656435ed7a48c4e4acfe41d971fff614a
3
+ size 4974608984
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bd2e4a5a006ae4d6ddf411610fe91d9de6c0ad1322de5bdc0a8fd256ca308da
3
- size 4931323320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0eb324a0d91e44718d5afacf76f9e657052bb453bd071b417e2010c9f438562
3
+ size 4931323108
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:defb637e131e8b15e2c48cdee3fbc8abacd5f3e3fbdbb8c0ed2e25eeed152321
3
  size 4974575772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5066e4b9ec3627103a9d7b4f41ac8e3a4d414fb340ee040fa6c51490196bce2c
3
  size 4974575772
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abcd27dfaa1af1f73d40601f853d259e0a0de1a4934c99ca63dcbf270402f67
3
  size 4974575772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdf3146096073861f9e51c43c0fee6f94c68075d4b2dde9bf1f223b04c09ae7d
3
  size 4974575772
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7ac364296bd50ee3665a7fe0fa6c92491b4ee1b9fb4fa6051a0bc446c594a03
3
- size 4974608772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a6e97a47ad23f785d24ec1e1d3a7f4a05a840bd68f5bcccd9d124eacca0820
3
+ size 4974608984
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd11819a01bb4239fceb1444568b3c020f0c433dfc1a6da8a4db524999b4744d
3
- size 4748707216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf47604d3b1484336f60ebb5e11ff0b63962fc79e372e6114a85a7ff30077ef
3
+ size 4748707004
model.safetensors.index.json CHANGED
@@ -569,7 +569,7 @@
569
  "model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
570
  "model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
571
  "model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
572
- "model.layers.29.self_attn.k_scale": "model-00004-of-00008.safetensors",
573
  "model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
574
  "model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
575
  "model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
@@ -579,7 +579,7 @@
579
  "model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
580
  "model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
581
  "model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
582
- "model.layers.29.self_attn.v_scale": "model-00004-of-00008.safetensors",
583
  "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
584
  "model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
585
  "model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
@@ -1819,7 +1819,7 @@
1819
  "model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
1820
  "model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
1821
  "model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
1822
- "model.layers.74.self_attn.k_scale": "model-00008-of-00008.safetensors",
1823
  "model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
1824
  "model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
1825
  "model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
@@ -1829,7 +1829,7 @@
1829
  "model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
1830
  "model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
1831
  "model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
1832
- "model.layers.74.self_attn.v_scale": "model-00008-of-00008.safetensors",
1833
  "model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
1834
  "model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
1835
  "model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",
 
569
  "model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
570
  "model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
571
  "model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
572
+ "model.layers.29.self_attn.k_scale": "model-00003-of-00008.safetensors",
573
  "model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
574
  "model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
575
  "model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
 
579
  "model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
580
  "model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
581
  "model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
582
+ "model.layers.29.self_attn.v_scale": "model-00003-of-00008.safetensors",
583
  "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
584
  "model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
585
  "model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
 
1819
  "model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
1820
  "model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
1821
  "model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
1822
+ "model.layers.74.self_attn.k_scale": "model-00007-of-00008.safetensors",
1823
  "model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
1824
  "model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
1825
  "model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
 
1829
  "model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
1830
  "model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
1831
  "model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
1832
+ "model.layers.74.self_attn.v_scale": "model-00007-of-00008.safetensors",
1833
  "model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
1834
  "model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
1835
  "model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",
recipe.yaml CHANGED
@@ -4,4 +4,4 @@ DEFAULT_stage:
4
  ignore: [lm_head]
5
  targets: Linear
6
  scheme: W4A16
7
- kv_cache_scheme: {num_bits: 8, type: int, symmetric: true, strategy: tensor, dynamic: false}
 
4
  ignore: [lm_head]
5
  targets: Linear
6
  scheme: W4A16
7
+ kv_cache_scheme: {num_bits: 8, type: float, symmetric: true, strategy: tensor, dynamic: false}