Push folder to HuggingFace Hub

Browse files

Files changed (11) hide show

config.json +20 -20
model-00001-of-00008.safetensors +1 -1
model-00002-of-00008.safetensors +1 -1
model-00003-of-00008.safetensors +2 -2
model-00004-of-00008.safetensors +2 -2
model-00005-of-00008.safetensors +1 -1
model-00006-of-00008.safetensors +1 -1
model-00007-of-00008.safetensors +2 -2
model-00008-of-00008.safetensors +2 -2
model.safetensors.index.json +4 -4
recipe.yaml +1 -1

config.json CHANGED Viewed

@@ -6,7 +6,24 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
-  "compression_config": {
     "config_groups": {
       "group_0": {
         "input_activations": null,
@@ -29,7 +46,7 @@
       }
     },
     "format": "pack-quantized",
-    "global_compression_ratio": 1.9030208009670313,
     "ignore": [
       "lm_head"
     ],
@@ -43,28 +60,11 @@
       "observer_kwargs": {},
       "strategy": "tensor",
       "symmetric": true,
-      "type": "int"
     },
     "quant_method": "compressed-tensors",
     "quantization_status": "compressed"
   },
-  "eos_token_id": [
-    128001,
-    128008,
-    128009
-  ],
-  "head_dim": 128,
-  "hidden_act": "silu",
-  "hidden_size": 8192,
-  "initializer_range": 0.02,
-  "intermediate_size": 28672,
-  "max_position_embeddings": 131072,
-  "mlp_bias": false,
-  "model_type": "llama",
-  "num_attention_heads": 64,
-  "num_hidden_layers": 80,
-  "num_key_value_heads": 8,
-  "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
     "factor": 8.0,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 8192,
+  "initializer_range": 0.02,
+  "intermediate_size": 28672,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_hidden_layers": 80,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "quantization_config": {
     "config_groups": {
       "group_0": {
         "input_activations": null,
       }
     },
     "format": "pack-quantized",
+    "global_compression_ratio": 1.90302080160764,
     "ignore": [
       "lm_head"
     ],
       "observer_kwargs": {},
       "strategy": "tensor",
       "symmetric": true,
+      "type": "float"
     },
     "quant_method": "compressed-tensors",
     "quantization_status": "compressed"
   },
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
     "factor": 8.0,

model-00001-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b802d430c85e53c7fb93a310132aaf3107670b5f6c23aab224f1bfe3ea6cc7c
 size 4947660132

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f78e8be929613c66e6e58dff2b658fbc990a8e2efd2b698b0495274ab0ffe19
 size 4947660132

model-00002-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5605b43be0b01e79edbd29f76ccad1aadff2ae514d28666c4e9a77a47e08c6b7
 size 4974575684

 version https://git-lfs.github.com/spec/v1
+oid sha256:95a6b35c2cd34f2b13ccbe9cc1f0c477ae80d7ecc0254bd5471d63d781101378
 size 4974575684

model-00003-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62d28dd8464a3d04f5ddb99790b49b4184e8d186e913f23faafab8f522555a25
-size 4974608772

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad932fe2275531be72bbda787acd2a2656435ed7a48c4e4acfe41d971fff614a
+size 4974608984

model-00004-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bd2e4a5a006ae4d6ddf411610fe91d9de6c0ad1322de5bdc0a8fd256ca308da
-size 4931323320

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0eb324a0d91e44718d5afacf76f9e657052bb453bd071b417e2010c9f438562
+size 4931323108

model-00005-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:defb637e131e8b15e2c48cdee3fbc8abacd5f3e3fbdbb8c0ed2e25eeed152321
 size 4974575772

 version https://git-lfs.github.com/spec/v1
+oid sha256:5066e4b9ec3627103a9d7b4f41ac8e3a4d414fb340ee040fa6c51490196bce2c
 size 4974575772

model-00006-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4abcd27dfaa1af1f73d40601f853d259e0a0de1a4934c99ca63dcbf270402f67
 size 4974575772

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdf3146096073861f9e51c43c0fee6f94c68075d4b2dde9bf1f223b04c09ae7d
 size 4974575772

model-00007-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7ac364296bd50ee3665a7fe0fa6c92491b4ee1b9fb4fa6051a0bc446c594a03
-size 4974608772

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7a6e97a47ad23f785d24ec1e1d3a7f4a05a840bd68f5bcccd9d124eacca0820
+size 4974608984

model-00008-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd11819a01bb4239fceb1444568b3c020f0c433dfc1a6da8a4db524999b4744d
-size 4748707216

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bf47604d3b1484336f60ebb5e11ff0b63962fc79e372e6114a85a7ff30077ef
+size 4748707004

model.safetensors.index.json CHANGED Viewed

@@ -569,7 +569,7 @@
     "model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
-    "model.layers.29.self_attn.k_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
@@ -579,7 +579,7 @@
     "model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
-    "model.layers.29.self_attn.v_scale": "model-00004-of-00008.safetensors",
     "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
     "model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
     "model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
@@ -1819,7 +1819,7 @@
     "model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
-    "model.layers.74.self_attn.k_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
@@ -1829,7 +1829,7 @@
     "model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
-    "model.layers.74.self_attn.v_scale": "model-00008-of-00008.safetensors",
     "model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
     "model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",

     "model.layers.29.self_attn.k_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.k_proj.weight_shape": "model-00004-of-00008.safetensors",
+    "model.layers.29.self_attn.k_scale": "model-00003-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.o_proj.weight_shape": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.v_proj.weight_packed": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
     "model.layers.29.self_attn.v_proj.weight_shape": "model-00004-of-00008.safetensors",
+    "model.layers.29.self_attn.v_scale": "model-00003-of-00008.safetensors",
     "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
     "model.layers.3.mlp.down_proj.weight_packed": "model-00001-of-00008.safetensors",
     "model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
     "model.layers.74.self_attn.k_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.k_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.k_proj.weight_shape": "model-00008-of-00008.safetensors",
+    "model.layers.74.self_attn.k_scale": "model-00007-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.o_proj.weight_shape": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.v_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.v_proj.weight_scale": "model-00008-of-00008.safetensors",
     "model.layers.74.self_attn.v_proj.weight_shape": "model-00008-of-00008.safetensors",
+    "model.layers.74.self_attn.v_scale": "model-00007-of-00008.safetensors",
     "model.layers.75.input_layernorm.weight": "model-00008-of-00008.safetensors",
     "model.layers.75.mlp.down_proj.weight_packed": "model-00008-of-00008.safetensors",
     "model.layers.75.mlp.down_proj.weight_scale": "model-00008-of-00008.safetensors",

recipe.yaml CHANGED Viewed

@@ -4,4 +4,4 @@ DEFAULT_stage:
       ignore: [lm_head]
       targets: Linear
       scheme: W4A16
-      kv_cache_scheme: {num_bits: 8, type: int, symmetric: true, strategy: tensor, dynamic: false}

       ignore: [lm_head]
       targets: Linear
       scheme: W4A16
+      kv_cache_scheme: {num_bits: 8, type: float, symmetric: true, strategy: tensor, dynamic: false}