nm-testing
/

tinyllama-one-shot-w4a16-channel-packed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/network/sadkins/llama1.1b_W4A16_channel_compressed",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -29,7 +29,7 @@
         }
       },
       "format": "pack-quantized",
-      "global_compression_ratio": 2.2221179821028847,
       "ignore": [
         "lm_head"
       ],
@@ -38,7 +38,7 @@
     },
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 19.09593405666055,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
@@ -58,8 +58,8 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.39.0",
   "use_cache": true,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "/root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-intermediate-step-1431k-3T/snapshots/036fa4651240b9a1487f709833b9e4b96b4c1574",
   "architectures": [
     "LlamaForCausalLM"
   ],
         }
       },
       "format": "pack-quantized",
+      "global_compression_ratio": 1.891791164021256,
       "ignore": [
         "lm_head"
       ],
     },
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 19.098103233975568,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.0",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
-  "transformers_version": "4.39.0"
 }

   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
+  "transformers_version": "4.40.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0e68908a6e7a66b8402436e489d5f439163752faa77f5ed61fd095728ef0ce4
-size 1011146888

 version https://git-lfs.github.com/spec/v1
+oid sha256:926e8c101497f34c5268f8958da0a2e65770e45664fed12838c0c9a495ef0cba
+size 747618312

recipe.yaml CHANGED Viewed

@@ -1,9 +1,9 @@
-test_stage:
   quant_modifiers:
-    vLLMQuantizationModifier:
       ignore: [lm_head]
       config_groups:
         group_0:
           weights: {num_bits: 4, type: int, symmetric: true, strategy: channel}
           targets: [Linear]
-    SparseGPTModifier: {sparsity: 0.0, quantize: true, sequential_update: false}

+quant_stage:
   quant_modifiers:
+    GPTQModifier:
+      sequential_update: false
       ignore: [lm_head]
       config_groups:
         group_0:
           weights: {num_bits: 4, type: int, symmetric: true, strategy: channel}
           targets: [Linear]

tokenizer.json CHANGED Viewed

@@ -134,6 +134,7 @@
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,

     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
+    "ignore_merges": false,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,