diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..ef3ce1a7859ee3e04f6783c9524081e82ab8849e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+rubra-11b-h.png filter=lfs diff=lfs merge=lfs -text
diff --git a/cal_data.safetensors b/cal_data.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd988e09571b1e2a570a608f858c02db1d47b325
--- /dev/null
+++ b/cal_data.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08be1103ff8fcef33b570f3c0f5ae4cc7f9dc5c3f264105baa55fc9b132ed1be
+size 1638488
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..081e6d0806e9eacfe14d2c2783d71ed6e6cd8717
--- /dev/null
+++ b/config.json
@@ -0,0 +1,26 @@
+{
+ "_name_or_path": "models/rubra-11b-h",
+ "architectures": [
+ "MistralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mistral",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 48,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float16",
+ "transformers_version": "4.38.2",
+ "use_cache": false,
+ "vocab_size": 32000
+}
diff --git a/example1.png b/example1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9f6f26a3ab7d0de6534adc2192454b2b1f92ecd
Binary files /dev/null and b/example1.png differ
diff --git a/example2.png b/example2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1e0041d83e7c53413c66988c9bd3b5b5fa280b2f
Binary files /dev/null and b/example2.png differ
diff --git a/example3.png b/example3.png
new file mode 100644
index 0000000000000000000000000000000000000000..1ab190c3029d443abb020d670a5a7a44abe03f23
Binary files /dev/null and b/example3.png differ
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..81e4802e5819d7759c46acbe055c75e4b6d092c5
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.38.2",
+ "use_cache": false
+}
diff --git a/hidden_states.safetensors b/hidden_states.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae83db331829e64b1e119f3606a03f350dc9a2de
--- /dev/null
+++ b/hidden_states.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4318a74c06cf05ea33ea07878cc25d5d18876645a3fddf648720e6481defc27
+size 1677730376
diff --git a/job_new.json b/job_new.json
new file mode 100644
index 0000000000000000000000000000000000000000..92fc21a9d87cf699e00e4650d266e7d52fecca06
--- /dev/null
+++ b/job_new.json
@@ -0,0 +1,98825 @@
+{
+ "in_dir": "base_model",
+ "out_dir": "sanjay920/rubra-11b-h-EXL2",
+ "cal_dataset": "wikitext-test.parquet",
+ "bits": 5.0,
+ "dataset_rows": 100,
+ "measurement_rows": 16,
+ "length": 2048,
+ "measurement_length": 2048,
+ "head_bits": 6,
+ "shard_size": 8192,
+ "compile_full": null,
+ "rope_scale": null,
+ "rope_alpha": null,
+ "output_measurement": null,
+ "progress": "compile",
+ "cal_filename": "sanjay920/rubra-11b-h-EXL2/cal_data.safetensors",
+ "last_module_idx": 98,
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.902508161496371,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9203556086868048,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9246767205186188,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955721165984869,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563947499264032,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568075467832386,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.967554040485993,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687355454079807,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720487990416586,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737953173462301,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780201958492398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794629843672737,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802156471414492,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822191685670987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892206196091138,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910724487854168,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913987399486359,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943463499366771,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976803997560637,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9123474769294262,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9179784115403891,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.932591964257881,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.937203103909269,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9628385086543858,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679891671985388,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975108077051118,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814978303038515,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832107973634265,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824423746322282,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846852865885012,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910700924519915,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924008402740583,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950336539186537,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954662031377666,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970542312948965,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985220660455525,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.8877861187793314,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8973407302983105,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.910242407117039,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9385041804052889,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.940526916179806,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9435215112753212,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9515076652169228,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955113283591345,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963577882386744,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648914394201711,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705673614516854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749045115895569,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721663881791756,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771681335987523,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837308657588437,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887261725962162,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859461099258624,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945900982129388,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963489300280344,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9527104110457003,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563737579155713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9567327841650695,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568935022689402,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930489940161351,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939982455398422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947059626574628,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970151603920385,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970192952168873,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996223299196572,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975017743272474,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980218067430542,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983664975079591,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998927123764588,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990109881728131,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991535865574406,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993296123193431,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9912483744265046,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916435057821218,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923406311427243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994128311634995,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952435296145268,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953209938539658,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969110778911272,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286335446872,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972990020178258,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974499639647547,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974876438791398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976576824410586,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979693320783554,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981310617731651,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988565697785816,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990829429698351,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991081247353577,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995378834355506,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997747411516684,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9892762480885722,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895454781362787,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912471331772394,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991794113710057,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947041008272208,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951269059383776,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958544310502475,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972624051006278,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975140962487785,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973148557328386,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976522701763315,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998632437454944,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988308300817152,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992609197433922,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993038972388604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994499729345989,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998017110610817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9899887884967029,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904003122937866,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915672297938727,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936683645646553,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944629863894079,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945710314495955,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955734857358038,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956779086787719,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961270387721015,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962668074440444,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971315091243014,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974506823491538,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997335236883373,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976795297843637,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985245664138347,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988046068392578,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987181324831909,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995058817348763,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996684330690186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9844154579914175,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847926673828624,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871780377579853,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879374770680442,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921912606514525,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928218296554405,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938397765217815,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959715837030672,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963449053175282,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960351230402011,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965353266888997,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997980712352728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982754946904606,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989119462479721,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989720682242478,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991756232702755,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997108341558487,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9940207607578486,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951273926417343,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978095898550237,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977955239373841,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976957584294723,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979161136798211,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977876108750934,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997963167646958,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997377075618715,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973086231620982,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998610572751204,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987351718518767,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986403053189861,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987657150850282,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992435346975981,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994336164018023,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992440667119808,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998596248324247,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998452412955885,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9953385644475929,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954606430546846,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961155957134906,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963309457525611,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976867814257275,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978648535179673,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981546685303329,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988047819279018,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989133847047924,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988297178279026,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989727466163458,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994054905910161,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994889207046072,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996792593592545,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996966957805853,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997519157095667,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9999101926136973,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.983027494745329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988349880441092,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933699457033072,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933736611856148,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932925261673518,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959597528359154,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934077207872178,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996139597526053,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955893133592326,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955878545151791,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972478003473952,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997818165429635,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973545127431862,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977891548696789,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987297143379692,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999152827916987,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987308212730568,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997582192772825,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996598286616063,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9925510261964519,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992728061741218,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993811263149837,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941331359441392,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962669986125547,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965560747805284,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970157300849678,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980933387705591,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982620093869627,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981101373850834,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983399104385171,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990414018138836,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991743572754785,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994895925538003,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995117338021373,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995983660337515,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998637763601437,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9853996349847876,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894411026616581,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904139981372282,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927006095531397,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937793352000881,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939195910119452,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951347391761374,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953175328264479,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995725197615684,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959138984268066,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967400987807196,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969654745509615,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969755911588436,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997284421682707,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982928356475895,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986039540162892,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985298825704376,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994038183504017,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996078892836522,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9796033757738769,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801610637223348,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834316545748152,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844096631277353,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898302512592636,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906708221533336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99199359229533,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947536016406957,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995231583743589,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948356513341423,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954958254238591,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973706452437909,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977587403773214,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985758317998261,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986605149169918,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989306384450174,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996214626262372,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9842456089681946,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847515997826122,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862019201391377,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989355675119441,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918334566464182,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919354065787047,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939575470634736,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941131755767856,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950063232099637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995209871471161,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958428621175699,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962687853985699,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962730693951016,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966743008699268,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979651567628025,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982993875746615,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998358772485517,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992254695025622,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995699401733873,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.974171947222203,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748882604762912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789550169371068,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801530737895519,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871387034072541,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881842365139164,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898216370493174,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933750616037287,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939624306280166,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934718561416958,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994298821548,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966810355253983,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99716517124034,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982046157965669,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983098571392475,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998642562663008,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995165277887281,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9817969363648444,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826170109445229,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842383282957599,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987987891305238,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902089073439129,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905349539476447,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925528938765638,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929857852112036,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941390909953043,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943753193656448,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953846742282622,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957563256757567,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958568579750136,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962565110181458,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977382431097794,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981046210305067,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982187150235404,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990283438819461,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995301622984698,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9699697830947116,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707644692389295,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758064048364758,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772977469256148,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850295406067744,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862511573592201,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882942370604724,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922574244847056,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929685922397766,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924017082084902,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933631024032366,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961383405752713,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966958504082868,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979089612534153,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980363582653808,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984643486241112,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994418839887658,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9762045053066686,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977105101919733,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792947630630806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844983145594597,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880177224404179,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882629308849573,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916761830099858,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919323883077595,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927965526585467,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931856998300646,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940749995876104,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945208916324191,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947839853120968,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952225348097272,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970656104414957,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598896999145,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978074637183454,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986692166348803,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994058420306828,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9653462056303397,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966274723643437,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718257325002924,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734515016898513,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827394402818754,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841620186925866,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863689955091104,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910840421216562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918851483380422,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912260650889948,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923439234553371,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955300039000576,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996181691181846,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975773043261142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977210198558168,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981782444083365,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993462364582228,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9896986646344885,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898966330802068,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959875158820068,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959419664373854,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958830689574825,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994527042581467,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958289333299035,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945835779071786,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961835095891729,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961431270785397,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976346265902976,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984772361494834,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976802616729401,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998478227716987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987404754647287,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989050509684603,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987427304295124,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999758682492029,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996692690583586,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.990279221732635,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905298211961053,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920168473036028,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924921841884498,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951802256982774,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459609918762,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961894396110438,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975046989129623,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977252853277605,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598426244687,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978572657200857,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987597939252737,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989334050405887,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993266929595848,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993670692056185,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994898942059081,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998092828209337,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9863239590195008,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868265291443095,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921606028510723,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919705191277899,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919389690621756,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992855364602292,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919611498771701,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929512560192961,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936951973941177,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936885720526334,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966282595996745,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969246424443554,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966480978473555,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969086252094712,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981692790461238,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984889770348673,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981699131167261,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995570989412954,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999439229904965,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9847793944063596,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985123383696191,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875411554821767,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882662810268812,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923551889369264,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929447612666991,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939775222155731,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996076546041877,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964278059342178,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961227821040666,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965966620657127,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980314106360311,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983075839991216,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989473513051053,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989980212376395,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991948430506454,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999716363789048,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9758125139633194,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766555116511881,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786176779307425,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845216747489758,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873013857286423,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874568734667264,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911378039978445,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912875468144193,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920827563619241,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928605046588928,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936999409110285,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940654404636007,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942948371754028,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947681912162807,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968999695556704,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974256125715328,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977206327166641,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986770139730652,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993911658057186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9619943019933999,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630233785137534,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9692408089758828,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710238851839676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810652892338112,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826576914638281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851201827987097,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902692650794052,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911376674135681,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990367868449539,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916139839915559,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950910237093922,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958211112534627,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973492388962768,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974947760347277,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979947993706446,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992910219734767,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9732593579683453,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9740604794351384,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758571569109336,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981850401032716,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858934246003628,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859436851111241,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901865926804021,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901829248992726,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916712933336385,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991981672210386,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932556867133826,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936436033167411,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937982890987769,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943627052416559,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966125944047235,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971977410459658,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975617044838145,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998476260661846,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993510511812929,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9590617874637246,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9601758192293346,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669503723271191,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689558737445623,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794857824454084,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812142355367541,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839909761212766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894312581745908,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903528640279546,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895940163987689,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909067719127052,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946883149386849,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954700223461259,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997104218302411,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972932919627056,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997851840693329,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992196869279724,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9674572005169466,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686749550746754,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713187958113849,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791675666347146,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834151559043676,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835956042516045,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892363072722219,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894226833130233,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904058027314022,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911344906722661,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915743695746642,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922297771845479,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926136705034878,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931706467177719,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959881600225344,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965825928520644,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972190943808528,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980772634007735,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992562690567866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9578170392196625,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9590193158946931,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663224390242249,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685161245288327,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788233999861404,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806237743468955,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836092637851834,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889325238764286,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051793083549,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892134199035354,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906160493264906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944984828180168,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953196261485573,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969970610691234,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997200449812226,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978280807699775,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991946576228656,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9639496663585305,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654018925502896,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478466277942,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765551248565316,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993222054094,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812793986639008,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867310639237985,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869522373774089,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879012388410047,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883594005950727,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903323815669864,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910267862142064,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912635658401996,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921826082281768,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951284613925964,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960675196198281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962311515701003,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977348528045695,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990211970216478,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9561710001435131,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574132142588496,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9650822830153629,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674224474001676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779465935425833,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798049030359834,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829885881626979,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884562431252562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895629284437746,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887702904525213,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902152858558111,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942774242081214,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995120030187536,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968639670987613,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970916908496292,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977569706679787,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991551881867053,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9918902807403356,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919913714402355,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941640416509472,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994114655040903,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943733899563085,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995450929418439,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945171528088395,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954713895567693,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960881019214867,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961418636376038,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970261527196271,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997912951730541,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969646203535376,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978629015240585,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983468192367582,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987805694254348,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983534378916374,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996761174661515,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995716315679601,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9872070293640718,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875441331532784,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897534946794622,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904376343474723,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936092627176549,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941051893692929,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950599781586789,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966741525713587,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969771623436827,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967607702419627,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971554788498906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983539172244491,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985822996895877,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999102601341292,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999159805731324,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993414144846611,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997426053332674,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.979622381972149,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979827641043812,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915289613127243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915508964331821,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917960677703377,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99220766252256,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919130146445241,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920798306411598,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899292784975842,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899077572044916,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952546581625938,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955962051753886,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952810344693717,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956986843317281,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968346282839775,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975842618150637,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968424138351111,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993985197106667,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992879528981575,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9795986886601895,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800954010570422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836475889314897,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847203819663264,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897230201167986,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905139947659336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920523978071287,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946800437464844,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951683254330419,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947878780949395,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954185747192241,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973539462371264,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977197637344943,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985762029973557,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998654469876783,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989491958876897,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996122916345485,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9591868193820119,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9606455501634628,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963689126772806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9717553314985707,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779619486071169,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790570001350716,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839622974977829,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852984003955498,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869856234290637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874973931582645,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891311423270963,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899767622118816,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899715750943869,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910780027857982,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943656724644825,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953498630784452,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956311181304045,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976474752329523,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988193828030489,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9518720533233136,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9533123800065368,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9613713058643043,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9637906108982861,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758038044674322,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977887489949353,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812068799510598,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873612260562368,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885673976968974,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876663720351644,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892762696254067,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937061189557426,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946411842829548,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965408335992834,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967898455652175,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974644881876884,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999061718754092,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9581109315622598,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596416996791959,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630198783706874,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733511302620173,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787798321340233,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791236850433052,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867767564137466,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872424571658485,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881223333068192,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891258522984572,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893495296710171,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899621973163448,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904887008597143,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912755576951895,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994794098805869,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956135836691828,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996461629998521,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975838983955327,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990680614864687,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.949402768863365,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9508629126939923,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9593443237245083,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619821181986481,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744504160480574,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766377885825932,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980175971868448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865899439901114,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878867646912113,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869770252262242,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886702921357937,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933583022502717,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943471373990178,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996348384549492,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966181806812529,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973575899493881,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999004362798587,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9531727249268442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9553546705283225,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9591875285841525,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703965260414407,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756896772887558,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762924946844578,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842725213966332,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850752123165876,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985899701656308,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863111876766197,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987529011850711,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885381097556092,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890030458336696,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899866348132491,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938212371780537,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994938170624664,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954652949527372,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972972613177262,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988004061815445,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9480401042383164,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949517953209579,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9579005774576217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9605092275887728,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739315117476508,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761737691005692,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796711904928088,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861559904529713,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875617006327957,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866939535131678,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884115278837271,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931988872704096,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942056277068332,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962179714348167,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965195794648025,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972543855546974,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988709868557635,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9528840403072536,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9542537578381598,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580731589812785,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668758142506704,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9747469594003633,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754544387105852,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98189686704427,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828780224779621,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843287441181019,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857511474983767,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870679471641779,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881277907988988,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880640183691867,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891430772840977,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934388622350525,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947039625258185,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948006026097573,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971943795535481,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985773553344188,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9452786394394934,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9468083463143557,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9557567997835577,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585219516884536,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725894997827709,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748451801715419,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785158903105184,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856567675014958,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869674782385118,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860588647425175,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878197305952199,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928987653984223,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939260903047398,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960985295183491,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963896564295283,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529023896437,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989376296944101,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9910135175450705,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912824938655831,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941338914213702,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941643851052504,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945628636050969,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953863266273402,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948620661452878,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955346850911155,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961977519269567,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962104187288787,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976131010480458,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973233057680773,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976833624823485,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997326233657077,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980814326991094,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988377549670986,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980935500643682,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996226447292429,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994861224859051,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9842658781562932,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846874004579149,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872097237966955,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880227182293311,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921422847255599,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927411952521652,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993845232820604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959024451673031,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962649137887638,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960189153935062,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964987884595757,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997976030492282,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982532844805974,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988919206953142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989660839673888,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991774850786896,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996749793717754,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9837653411086649,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840333891916089,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904419900849462,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904393116594292,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907222538604401,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885966366273351,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906790258246474,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888943562982604,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991739244927885,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916874898481183,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955156600044575,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954305627325084,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955371092073619,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360554984305,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968435579503421,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997513074951712,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968456693313783,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993884474752122,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992451430443907,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9759478892665356,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765194484498352,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803963751764968,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815706800436601,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878923632204533,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888117971713655,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904785320395604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937392670253757,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994295743497787,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938581893220544,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99459478398785,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996881335915532,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973090803541709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983202805378824,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984141130771604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987336630001664,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995366168823239,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9556966400705278,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9570334849413484,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.960812549572438,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703220267547294,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772858648793772,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778754758881405,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854066136176698,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862237990018912,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869342013844289,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879776879679412,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887068463722244,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893193228635937,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898968231282197,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162148156203,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942090424301568,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953937472309917,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957477512361947,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974927010043757,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988711644837167,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.939760722219944,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9414845332503319,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9511478268541396,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.954194939462468,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698484404943883,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972340663196519,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763197798747569,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841194238979369,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855774453608319,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846697235479951,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866198089439422,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921901000780053,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933257188240532,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956658300361596,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960375506052515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968587130861124,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998800835244765,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9503053929656744,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9525580331683159,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565810647327453,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682650562608615,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738911159802228,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746871701208875,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981898903963156,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829692101338878,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843669487163424,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848410444683395,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867640393786132,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879159886040725,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881031874101609,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892810968449339,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934559455141425,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945060422469396,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950925561715849,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970777546841418,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986730054151849,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9339546884875745,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9359053233638406,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9467246429994702,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9503237595781684,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668553009396419,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696119821164757,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741377918981016,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823939295019954,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840543393511325,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831261212239042,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985276190796867,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913982859579846,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926482754817698,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951997506432235,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956310951092746,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966006540635135,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986620795898489,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.950924233533442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526217842940241,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9560005511157215,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651925785001367,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.974756367970258,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754410153254867,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821089534088969,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829862234182656,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833339191973209,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847710862522945,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871181106427684,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879768814425915,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882259144214913,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893726909067482,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922168490593322,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943546110007446,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933991891448386,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969799995742505,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998169852187857,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.930072165094316,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9321989654563367,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9437624660786241,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9476719659287483,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649747095536441,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678829013137147,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972721746424213,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812747784890234,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830624930327758,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821682657347992,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844429356744513,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909043678781018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992228524555685,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948874444817193,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953900648106355,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996433951149811,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998560123873176,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9506571341771632,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95194994029589,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554108332376927,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965399626409635,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973783713998273,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746870229719207,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815001173410565,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827296736184508,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836740818573162,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849110426730476,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869808834628202,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881130746216513,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988304317521397,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895301525830291,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934029994474258,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946216194075532,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949501673690975,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529373142403,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986130048564519,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9300565491430461,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9322172533720732,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9436244373209774,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475068859755993,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649612298235297,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478236939758,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727855251403525,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812858692603186,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830675406847149,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821449116570875,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844449243973941,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908643537783064,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922332215355709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948491686081979,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995345929695759,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963671152072493,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985312398639508,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9852219514432363,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854540597880259,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992581088328734,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925188558117952,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992044290585909,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908394656376913,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921488391701132,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909024515654892,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931627179321367,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933414816041477,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962164734170074,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975688391277799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962678278825479,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975411284103757,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981369049564819,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986740430613281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981451820058282,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850489922304,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994992236806866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9810774835059419,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816042414167896,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984690910903737,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857331512612291,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99059497367125,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913256106083281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926997422298882,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950385903939605,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954885762417689,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952267833868973,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958167897420935,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975691078143427,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979071016423404,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986456794285914,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987542949966155,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990193766789162,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995884474592458,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9801707121077925,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803194139385596,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864167625200935,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861038540257141,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867335963645019,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873976144008338,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986821249302011,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876039975206368,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904251811094582,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903880580095574,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943270806106739,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950134730897844,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943478996865451,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949842338974122,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966566005459754,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976610257581342,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966578336025123,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993154952098848,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991263385672937,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9730628487886861,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973774635582231,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779872408835217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792550611309707,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865127976518124,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875629763700999,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893352640210651,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929699986823834,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993587943376042,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931577415263746,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939938187017106,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965159249695716,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970039997715503,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980992393029737,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982241835823515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985617383063072,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99946317839931,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.960547131486237,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619710550177842,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641266402322799,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726306177908555,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774892054265365,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978482106118463,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831622139317915,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843430254259147,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861415453488007,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869873017305508,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988334433583077,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893150111893192,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892438768292777,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901899283868261,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938217146846,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951323153509293,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948282100085635,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976286789024016,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985810204962036,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9314752910286188,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9334497996605933,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442711595911533,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477534026373178,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658016683533788,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686134352814406,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731169363949448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820591834140942,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836634104140103,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826249615289271,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984836339019239,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911455910187215,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924397784052417,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951095145952422,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954999905312434,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996416717855027,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986326706202817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9617549669928849,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633937487378716,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656938512343913,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973594733630307,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796587142627686,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803864565910771,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848732578684576,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858064573490992,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872101178625599,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877110663801432,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895211174734868,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901978281559423,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903222138527781,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910594234825112,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943284161563497,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953842611867003,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952440769411623,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976959823688958,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986929383812821,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9317440665327013,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9337658795993775,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442870973143727,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475885890424252,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661285383626819,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968816627166234,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731064232764766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823057807516307,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838297074311413,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828014142112806,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849381670355797,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912480839411728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925001342198811,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951820038841106,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955530187871773,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964022201456828,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986571829067543,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9678770737955347,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691678545204923,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709918311564252,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765152925392613,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807526028016582,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820293172961101,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841885701171122,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855642172624357,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866925096139312,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870485090650618,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897773859556764,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902223890530877,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902153389411978,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909679422853515,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942741809645668,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955411965493113,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948284650454298,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980486317217583,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986889937572414,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.932304578833282,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342344424221665,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444406803231686,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.947586020687595,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662447790615261,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689184916205704,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731233624042943,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98242219875101,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839446639525704,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828673212323338,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849894531071186,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912908738479018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925277467991691,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952316170674749,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955659814586397,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964031476119999,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986658465059008,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9655442168004811,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671692294068635,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697359376586974,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761799619300291,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803578312275931,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814343429170549,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843596651917323,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858793318271637,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869456980959512,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874889963539317,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896778290858492,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908777676173486,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990540920291096,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991681206272915,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948772302595899,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958305972686503,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956192086392548,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997941504814662,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988134019586141,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9325818351935595,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9344900948926806,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443744430318475,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474421949125826,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663861433509737,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690310031874105,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731040432816371,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824838677886873,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839952855254523,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829381878953427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850510018295608,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913280457840301,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925561861309689,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952473715238739,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995588112360565,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963884983881144,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986808212925098,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9875456623267382,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879995033843443,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943620861449745,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943356119911186,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945071052061394,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966131598775974,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945579385675956,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965798364137299,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963930066151079,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963805679144571,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978544423211133,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978882907453226,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979100275522796,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979497697204351,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985688193410169,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992296196833195,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985732258646749,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997000065504835,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999592912312437,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9806502945721149,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981183686060831,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839521977119148,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848100668168627,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903757648426108,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911127613158897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922769031254575,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950082659197506,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954419361311011,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951236607448664,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957144900981802,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975175310974009,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978603233030299,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986405047020526,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987238197063562,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989435812094598,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850031682494,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9871479421271943,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880682986695319,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990131174272392,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898401049431413,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900419003679417,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925147893663961,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990109168633353,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992998635803815,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936714109790046,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937381937343162,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962826371920528,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965584015008062,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964808065269608,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966537497966783,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974247746868059,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984650467449683,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99742937242263,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999517957230637,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992533045333403,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9704249275382608,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9712161585921422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754329854622483,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766340904170647,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985306327464059,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864371244912036,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881206953432411,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924467977543827,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930931831186172,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925579508999363,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934620868298225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962237248546444,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967452616401715,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979627527136472,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998072977818083,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983912546158535,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994254562079732,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.965649431804195,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668996206019074,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694499158067629,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9755590457934886,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807646207045764,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981554796686396,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854566292488016,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863431870471686,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881037988234311,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988736093393527,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904858563677408,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914403361617588,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915653750649653,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924774839309976,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99530891442555,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961586743447697,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996117347778636,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978859801776707,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989366781519493,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9328135452233255,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9347493099048734,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444518794771284,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474378717131913,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966479453491047,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691691853804514,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731421418255195,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982578118913807,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840653012506664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829857222503051,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851169427274726,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913530095946044,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925886914716102,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952522270032205,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956026887812186,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963736499339575,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830660418491,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9632973142433912,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647458797553554,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677944149589166,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744596001692116,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805283090099692,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815037929220125,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855152769596316,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868621388450265,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877731337910518,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881437398144044,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905192241421901,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911876061523799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915461755299475,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924024375504814,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948597592592705,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961402852204628,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957100535684731,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979259906467632,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988766744427267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9317238796502352,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336495161987841,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9432325733359903,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462370390538126,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965948719996959,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686132016358897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725569972069934,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822160669136792,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837689126143232,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827277816366404,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848514216719195,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912281962460838,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924635672650766,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951937775185797,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459379707463,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996324696301599,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986840636047418,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9624556459020823,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9638149088714272,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966128898318857,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731651132460684,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979888558271341,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807631041621789,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854432169813663,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865111434482969,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881467132945545,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988405313459225,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051496224478,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905749676981941,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908825725433417,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917469431529753,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947028355381917,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959433214535238,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956959496485069,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977895863121375,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998879095823213,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.9317085896618664,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336435773875564,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9429558923002332,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9459052374586463,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658660412533209,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685534046730027,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97245246428065,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821948111057281,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837224487564526,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826810664962977,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848097579088062,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912046356475912,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924372929672245,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951681817183271,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995533375418745,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962979012634605,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986440870707156,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9656216644216329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668171582743526,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708404617849737,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773957233410329,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818287189118564,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825881774304435,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866229476756416,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877469159546308,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876828042906709,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885670749936253,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910485846921802,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914193961885758,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918927509279456,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924896143202204,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954261311504524,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962042814731831,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963586331723491,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980747035369859,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989914739744563,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.9323020433075726,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342701844871044,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9433167146053165,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462174526415765,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660997575847432,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687749480362982,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725526751717553,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822096759453416,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837591436225921,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828054906101897,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849168861983344,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991255592321977,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924872927949764,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951640699291602,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955584222916514,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962992245709756,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986363174612052,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.self_attn": [
+ {
+ "accuracy": 0.9923371153709013,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925224412581883,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954915028938558,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99549823399866,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995459749334259,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971324702346465,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955590646713972,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972384700377006,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975309232977452,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975821622356307,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985752828288241,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984370510574081,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985908498783829,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998461567243794,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989729408916901,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991191346489359,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989771689652116,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997115908463456,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999673805286875,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.mlp": [
+ {
+ "accuracy": 0.9811925254762173,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817058588378131,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842266714549623,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849823992699385,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905871613882482,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913147779298015,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923701590159908,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951182099757716,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955421296763234,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952246638713405,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958023339568172,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975680393690709,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979028400339303,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986676175176399,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987487139806035,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989479207215481,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999586516083582,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.self_attn": [
+ {
+ "accuracy": 0.9863162519177422,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870703057968058,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923058476706501,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921633902995382,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924568198912311,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943161772680469,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924855433346238,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943068775464781,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946824459475465,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947726883983705,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960416847025044,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286915777251,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961076912295539,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971158314438071,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978602468036115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983802583883516,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978632671263767,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995351644083712,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993034406106744,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.mlp": [
+ {
+ "accuracy": 0.9722781821619719,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730187952518463,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761316905496642,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97704425919801,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861467212904245,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872288115439005,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885241388110444,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927656080690213,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933978671906516,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929643408977427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993827001657337,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964197074586991,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969165127258748,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980382031499175,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981734739849344,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984235907904804,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994161145659746,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.self_attn": [
+ {
+ "accuracy": 0.9635075002443045,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647748144343495,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678804749855772,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975613204529509,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811677185352892,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817737329285592,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870748340035789,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877749991719611,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888431992731057,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889247854589485,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905630798893981,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912089991848916,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916505919245537,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922619820863474,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952286969055422,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961338660214096,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963223397062393,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978658634063322,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989551416583708,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.mlp": [
+ {
+ "accuracy": 0.9316645334474742,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9335778222884983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9425450335256755,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455189639702439,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656224818900228,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683304415084422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721890660002828,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981844296795316,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834737943019718,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825274200411513,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846752261510119,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911125443177298,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923583892232273,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950715682061855,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954821281717159,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996249489224283,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985835553889046,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.self_attn": [
+ {
+ "accuracy": 0.9537865724414587,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9551741087343544,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607197451405227,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698350551771,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976477846968919,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769113931106403,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844105805968866,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849680608604103,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986211190931499,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871965564088896,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881930081755854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889599440502934,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989799489849247,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162202289328,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940077341743745,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952460718923248,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952964097028598,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973116889304947,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988119364206796,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.mlp": [
+ {
+ "accuracy": 0.9297301913611591,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9317178723867983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.941267877118662,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443992015440017,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644244713708758,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9672436950495467,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714020473184064,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993310529739,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98275462386664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818051372421905,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839897501515225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906999649247155,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919934530335013,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948506485670805,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995264163473621,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960826287424425,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985817404231057,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.self_attn": [
+ {
+ "accuracy": 0.9438093651551753,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455973405856639,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9558804116677493,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643303193151951,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721492730313912,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696818670490757,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780592895112932,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975161383044906,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798537619644776,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800844814162701,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868916404084302,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878041589981876,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882374077569693,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889709630515426,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932704607781488,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943467889679596,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945205381663982,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971068996092072,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984773485775804,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.mlp": [
+ {
+ "accuracy": 0.922890292480588,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.924928605556488,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9353087733034045,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9386197202838957,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604559869039804,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964205636177212,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687150722602382,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781123218126595,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808429721742868,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800167196663097,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823547832202166,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989655573212076,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911466648918577,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942131606221665,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947176255809609,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99570402916288,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983965002611512,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.self_attn": [
+ {
+ "accuracy": 0.9432861453387886,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456116510555148,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9537060365546495,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966851421049796,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733642505016178,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735991606721655,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824671322712675,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828924887115136,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846992893726565,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985385547101032,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866445885854773,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875469116959721,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884863024926744,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894941137754358,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993721309438115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994685380050214,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952394235297106,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971018662472488,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986625081146485,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.mlp": [
+ {
+ "accuracy": 0.9337531251367182,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9352742773480713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434063888620585,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9460821493994445,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660537266172469,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689954988425598,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726249100640416,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820060149068013,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838739162078127,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826479388866574,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848665873287246,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910833079484291,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924003609921783,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950582988094538,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360906558577,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962620872247498,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985231003520312,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.self_attn": [
+ {
+ "accuracy": 0.9973379723232938,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975171713740565,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982815929397475,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984281775759882,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830925190588,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987098783676629,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991939260071376,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992102088763204,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992937228125811,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993120388971874,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993242765995092,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993784863581823,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995262923948758,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995612997481658,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997043807688897,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997337334916665,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997336823016667,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998307124706116,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998758041801921,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.mlp": [
+ {
+ "accuracy": 0.9914991960686166,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917268468707334,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927488385292236,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930720798729453,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958052360889269,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961346215859521,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966010905336589,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978082937304862,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980109936295776,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978642705682432,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981226952077122,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988933841959806,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990413547820935,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993695393313828,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999404157664685,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994923167796514,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997564339473684,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.self_attn": [
+ {
+ "accuracy": 0.9942247387953103,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946865537785925,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962510646437295,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964844116329914,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972275005711708,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972892192599829,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998286331188865,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984212848285097,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985290603726753,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985686516301939,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986273442555103,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987223440039088,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990466599265346,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991211750748334,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994395356698078,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995100941032433,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995127596866951,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997207773012633,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998063149869267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.mlp": [
+ {
+ "accuracy": 0.9895866647129878,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898669680696912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911745040444657,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915538832719903,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948009337531403,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952090648002923,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957429091446102,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973041417542845,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975289096473716,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997346305361134,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976660298998468,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986318852752447,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988157128464081,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992340586177306,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992737183492864,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993743035811349,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997262309279904,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "strategy": {
+ "model.layers.0.self_attn": {
+ "accuracy": 0.9943463499366771,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.0.mlp": {
+ "accuracy": 0.9924008402740583,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.1.self_attn": {
+ "accuracy": 0.9945900982129388,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.1.mlp": {
+ "accuracy": 0.9939982455398422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.2.self_attn": {
+ "accuracy": 0.9981310617731651,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.2.mlp": {
+ "accuracy": 0.9951269059383776,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.3.self_attn": {
+ "accuracy": 0.9976795297843637,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.3.mlp": {
+ "accuracy": 0.9965353266888997,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.4.self_attn": {
+ "accuracy": 0.9978095898550237,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.4.mlp": {
+ "accuracy": 0.9953385644475929,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.5.self_attn": {
+ "accuracy": 0.997818165429635,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.5.mlp": {
+ "accuracy": 0.9965560747805284,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.6.self_attn": {
+ "accuracy": 0.997284421682707,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.6.mlp": {
+ "accuracy": 0.9954958254238591,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.7.self_attn": {
+ "accuracy": 0.9966743008699268,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.7.mlp": {
+ "accuracy": 0.994298821548,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.8.self_attn": {
+ "accuracy": 0.9962565110181458,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.8.mlp": {
+ "accuracy": 0.9933631024032366,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.9.self_attn": {
+ "accuracy": 0.9952225348097272,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.9.mlp": {
+ "accuracy": 0.996181691181846,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.10.self_attn": {
+ "accuracy": 0.9959875158820068,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.10.mlp": {
+ "accuracy": 0.9955459609918762,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.11.self_attn": {
+ "accuracy": 0.9969246424443554,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.11.mlp": {
+ "accuracy": 0.9965966620657127,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.12.self_attn": {
+ "accuracy": 0.9947681912162807,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.12.mlp": {
+ "accuracy": 0.9958211112534627,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.13.self_attn": {
+ "accuracy": 0.998476260661846,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.13.mlp": {
+ "accuracy": 0.9954700223461259,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.14.self_attn": {
+ "accuracy": 0.9980772634007735,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.14.mlp": {
+ "accuracy": 0.9953196261485573,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.15.self_attn": {
+ "accuracy": 0.9977348528045695,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.15.mlp": {
+ "accuracy": 0.995120030187536,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.16.self_attn": {
+ "accuracy": 0.995450929418439,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.16.mlp": {
+ "accuracy": 0.9950599781586789,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.17.self_attn": {
+ "accuracy": 0.9956986843317281,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.17.mlp": {
+ "accuracy": 0.9954185747192241,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.18.self_attn": {
+ "accuracy": 0.9976474752329523,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.18.mlp": {
+ "accuracy": 0.9946411842829548,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.19.self_attn": {
+ "accuracy": 0.9975838983955327,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.19.mlp": {
+ "accuracy": 0.9943471373990178,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.20.self_attn": {
+ "accuracy": 0.9972972613177262,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.20.mlp": {
+ "accuracy": 0.9942056277068332,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.21.self_attn": {
+ "accuracy": 0.9971943795535481,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.21.mlp": {
+ "accuracy": 0.9939260903047398,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.22.self_attn": {
+ "accuracy": 0.9976833624823485,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.22.mlp": {
+ "accuracy": 0.9964987884595757,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.23.self_attn": {
+ "accuracy": 0.9955371092073619,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.23.mlp": {
+ "accuracy": 0.99459478398785,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.24.self_attn": {
+ "accuracy": 0.9974927010043757,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.24.mlp": {
+ "accuracy": 0.9956658300361596,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.25.self_attn": {
+ "accuracy": 0.9970777546841418,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.25.mlp": {
+ "accuracy": 0.9951997506432235,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.26.self_attn": {
+ "accuracy": 0.9969799995742505,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.26.mlp": {
+ "accuracy": 0.9948874444817193,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.27.self_attn": {
+ "accuracy": 0.9971529373142403,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.27.mlp": {
+ "accuracy": 0.9948491686081979,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.28.self_attn": {
+ "accuracy": 0.9975688391277799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.28.mlp": {
+ "accuracy": 0.9958167897420935,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.29.self_attn": {
+ "accuracy": 0.9950134730897844,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.29.mlp": {
+ "accuracy": 0.9939938187017106,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.30.self_attn": {
+ "accuracy": 0.9951323153509293,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.30.mlp": {
+ "accuracy": 0.9951095145952422,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.31.self_attn": {
+ "accuracy": 0.9953842611867003,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.31.mlp": {
+ "accuracy": 0.9951820038841106,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.32.self_attn": {
+ "accuracy": 0.9955411965493113,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.32.mlp": {
+ "accuracy": 0.9952316170674749,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.33.self_attn": {
+ "accuracy": 0.9958305972686503,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.33.mlp": {
+ "accuracy": 0.9952473715238739,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.34.self_attn": {
+ "accuracy": 0.9966131598775974,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.34.mlp": {
+ "accuracy": 0.9957144900981802,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.35.self_attn": {
+ "accuracy": 0.9966537497966783,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.35.mlp": {
+ "accuracy": 0.9967452616401715,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.36.self_attn": {
+ "accuracy": 0.9961586743447697,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.36.mlp": {
+ "accuracy": 0.9952522270032205,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.37.self_attn": {
+ "accuracy": 0.9961402852204628,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.37.mlp": {
+ "accuracy": 0.9951937775185797,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.38.self_attn": {
+ "accuracy": 0.9959433214535238,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.38.mlp": {
+ "accuracy": 0.9951681817183271,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.39.self_attn": {
+ "accuracy": 0.9980747035369859,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.39.mlp": {
+ "accuracy": 0.9951640699291602,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.40.self_attn": {
+ "accuracy": 0.9985908498783829,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.40.mlp": {
+ "accuracy": 0.9958023339568172,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.41.self_attn": {
+ "accuracy": 0.9971158314438071,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.41.mlp": {
+ "accuracy": 0.9969165127258748,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.42.self_attn": {
+ "accuracy": 0.9978658634063322,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.42.mlp": {
+ "accuracy": 0.9950715682061855,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.43.self_attn": {
+ "accuracy": 0.9973116889304947,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.43.mlp": {
+ "accuracy": 0.9948506485670805,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.44.self_attn": {
+ "accuracy": 0.9971068996092072,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.44.mlp": {
+ "accuracy": 0.9947176255809609,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.45.self_attn": {
+ "accuracy": 0.9971018662472488,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.45.mlp": {
+ "accuracy": 0.9950582988094538,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.46.self_attn": {
+ "accuracy": 0.9982815929397475,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.46.mlp": {
+ "accuracy": 0.9961346215859521,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.47.self_attn": {
+ "accuracy": 0.9987223440039088,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.47.mlp": {
+ "accuracy": 0.9952090648002923,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ }
+ },
+ "q_last_module_idx": 98
+}
\ No newline at end of file
diff --git a/measurement.json b/measurement.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b19f995922327eadccee1c889b7441bd30ef053
--- /dev/null
+++ b/measurement.json
@@ -0,0 +1,93655 @@
+{
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.902508161496371,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9203556086868048,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9246767205186188,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955721165984869,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563947499264032,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568075467832386,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.967554040485993,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687355454079807,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720487990416586,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737953173462301,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780201958492398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794629843672737,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802156471414492,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822191685670987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892206196091138,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910724487854168,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913987399486359,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943463499366771,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976803997560637,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9123474769294262,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9179784115403891,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.932591964257881,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.937203103909269,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9628385086543858,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679891671985388,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975108077051118,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814978303038515,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832107973634265,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824423746322282,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846852865885012,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910700924519915,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924008402740583,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950336539186537,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954662031377666,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970542312948965,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985220660455525,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.8877861187793314,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8973407302983105,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.910242407117039,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9385041804052889,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.940526916179806,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9435215112753212,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9515076652169228,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955113283591345,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963577882386744,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648914394201711,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705673614516854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749045115895569,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721663881791756,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771681335987523,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837308657588437,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887261725962162,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859461099258624,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945900982129388,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963489300280344,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9527104110457003,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563737579155713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9567327841650695,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568935022689402,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930489940161351,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939982455398422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947059626574628,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970151603920385,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970192952168873,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996223299196572,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975017743272474,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980218067430542,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983664975079591,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998927123764588,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990109881728131,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991535865574406,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993296123193431,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9912483744265046,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916435057821218,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923406311427243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994128311634995,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952435296145268,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953209938539658,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969110778911272,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286335446872,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972990020178258,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974499639647547,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974876438791398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976576824410586,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979693320783554,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981310617731651,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988565697785816,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990829429698351,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991081247353577,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995378834355506,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997747411516684,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9892762480885722,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895454781362787,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912471331772394,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991794113710057,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947041008272208,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951269059383776,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958544310502475,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972624051006278,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975140962487785,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973148557328386,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976522701763315,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998632437454944,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988308300817152,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992609197433922,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993038972388604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994499729345989,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998017110610817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9899887884967029,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904003122937866,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915672297938727,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936683645646553,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944629863894079,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945710314495955,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955734857358038,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956779086787719,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961270387721015,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962668074440444,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971315091243014,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974506823491538,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997335236883373,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976795297843637,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985245664138347,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988046068392578,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987181324831909,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995058817348763,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996684330690186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9844154579914175,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847926673828624,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871780377579853,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879374770680442,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921912606514525,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928218296554405,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938397765217815,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959715837030672,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963449053175282,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960351230402011,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965353266888997,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997980712352728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982754946904606,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989119462479721,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989720682242478,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991756232702755,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997108341558487,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9940207607578486,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951273926417343,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978095898550237,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977955239373841,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976957584294723,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979161136798211,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977876108750934,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997963167646958,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997377075618715,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973086231620982,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998610572751204,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987351718518767,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986403053189861,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987657150850282,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992435346975981,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994336164018023,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992440667119808,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998596248324247,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998452412955885,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9953385644475929,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954606430546846,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961155957134906,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963309457525611,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976867814257275,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978648535179673,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981546685303329,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988047819279018,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989133847047924,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988297178279026,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989727466163458,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994054905910161,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994889207046072,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996792593592545,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996966957805853,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997519157095667,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9999101926136973,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.983027494745329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988349880441092,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933699457033072,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933736611856148,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932925261673518,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959597528359154,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934077207872178,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996139597526053,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955893133592326,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955878545151791,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972478003473952,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997818165429635,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973545127431862,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977891548696789,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987297143379692,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999152827916987,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987308212730568,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997582192772825,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996598286616063,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9925510261964519,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992728061741218,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993811263149837,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941331359441392,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962669986125547,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965560747805284,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970157300849678,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980933387705591,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982620093869627,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981101373850834,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983399104385171,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990414018138836,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991743572754785,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994895925538003,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995117338021373,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995983660337515,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998637763601437,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9853996349847876,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894411026616581,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904139981372282,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927006095531397,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937793352000881,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939195910119452,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951347391761374,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953175328264479,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995725197615684,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959138984268066,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967400987807196,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969654745509615,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969755911588436,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997284421682707,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982928356475895,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986039540162892,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985298825704376,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994038183504017,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996078892836522,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9796033757738769,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801610637223348,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834316545748152,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844096631277353,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898302512592636,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906708221533336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99199359229533,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947536016406957,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995231583743589,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948356513341423,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954958254238591,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973706452437909,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977587403773214,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985758317998261,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986605149169918,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989306384450174,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996214626262372,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9842456089681946,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847515997826122,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862019201391377,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989355675119441,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918334566464182,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919354065787047,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939575470634736,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941131755767856,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950063232099637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995209871471161,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958428621175699,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962687853985699,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962730693951016,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966743008699268,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979651567628025,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982993875746615,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998358772485517,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992254695025622,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995699401733873,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.974171947222203,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748882604762912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789550169371068,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801530737895519,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871387034072541,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881842365139164,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898216370493174,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933750616037287,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939624306280166,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934718561416958,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994298821548,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966810355253983,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99716517124034,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982046157965669,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983098571392475,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998642562663008,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995165277887281,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9817969363648444,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826170109445229,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842383282957599,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987987891305238,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902089073439129,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905349539476447,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925528938765638,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929857852112036,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941390909953043,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943753193656448,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953846742282622,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957563256757567,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958568579750136,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962565110181458,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977382431097794,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981046210305067,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982187150235404,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990283438819461,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995301622984698,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9699697830947116,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707644692389295,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758064048364758,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772977469256148,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850295406067744,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862511573592201,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882942370604724,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922574244847056,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929685922397766,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924017082084902,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933631024032366,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961383405752713,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966958504082868,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979089612534153,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980363582653808,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984643486241112,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994418839887658,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9762045053066686,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977105101919733,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792947630630806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844983145594597,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880177224404179,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882629308849573,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916761830099858,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919323883077595,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927965526585467,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931856998300646,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940749995876104,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945208916324191,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947839853120968,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952225348097272,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970656104414957,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598896999145,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978074637183454,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986692166348803,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994058420306828,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9653462056303397,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966274723643437,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718257325002924,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734515016898513,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827394402818754,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841620186925866,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863689955091104,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910840421216562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918851483380422,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912260650889948,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923439234553371,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955300039000576,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996181691181846,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975773043261142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977210198558168,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981782444083365,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993462364582228,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9896986646344885,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898966330802068,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959875158820068,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959419664373854,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958830689574825,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994527042581467,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958289333299035,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945835779071786,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961835095891729,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961431270785397,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976346265902976,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984772361494834,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976802616729401,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998478227716987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987404754647287,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989050509684603,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987427304295124,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999758682492029,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996692690583586,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.990279221732635,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905298211961053,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920168473036028,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924921841884498,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951802256982774,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459609918762,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961894396110438,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975046989129623,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977252853277605,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598426244687,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978572657200857,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987597939252737,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989334050405887,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993266929595848,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993670692056185,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994898942059081,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998092828209337,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9863239590195008,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868265291443095,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921606028510723,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919705191277899,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919389690621756,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992855364602292,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919611498771701,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929512560192961,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936951973941177,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936885720526334,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966282595996745,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969246424443554,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966480978473555,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969086252094712,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981692790461238,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984889770348673,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981699131167261,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995570989412954,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999439229904965,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9847793944063596,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985123383696191,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875411554821767,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882662810268812,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923551889369264,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929447612666991,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939775222155731,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996076546041877,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964278059342178,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961227821040666,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965966620657127,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980314106360311,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983075839991216,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989473513051053,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989980212376395,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991948430506454,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999716363789048,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9758125139633194,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766555116511881,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786176779307425,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845216747489758,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873013857286423,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874568734667264,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911378039978445,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912875468144193,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920827563619241,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928605046588928,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936999409110285,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940654404636007,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942948371754028,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947681912162807,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968999695556704,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974256125715328,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977206327166641,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986770139730652,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993911658057186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9619943019933999,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630233785137534,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9692408089758828,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710238851839676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810652892338112,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826576914638281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851201827987097,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902692650794052,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911376674135681,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990367868449539,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916139839915559,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950910237093922,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958211112534627,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973492388962768,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974947760347277,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979947993706446,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992910219734767,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9732593579683453,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9740604794351384,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758571569109336,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981850401032716,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858934246003628,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859436851111241,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901865926804021,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901829248992726,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916712933336385,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991981672210386,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932556867133826,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936436033167411,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937982890987769,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943627052416559,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966125944047235,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971977410459658,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975617044838145,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998476260661846,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993510511812929,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9590617874637246,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9601758192293346,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669503723271191,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689558737445623,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794857824454084,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812142355367541,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839909761212766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894312581745908,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903528640279546,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895940163987689,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909067719127052,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946883149386849,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954700223461259,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997104218302411,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972932919627056,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997851840693329,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992196869279724,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9674572005169466,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686749550746754,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713187958113849,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791675666347146,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834151559043676,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835956042516045,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892363072722219,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894226833130233,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904058027314022,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911344906722661,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915743695746642,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922297771845479,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926136705034878,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931706467177719,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959881600225344,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965825928520644,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972190943808528,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980772634007735,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992562690567866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9578170392196625,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9590193158946931,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663224390242249,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685161245288327,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788233999861404,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806237743468955,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836092637851834,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889325238764286,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051793083549,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892134199035354,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906160493264906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944984828180168,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953196261485573,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969970610691234,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997200449812226,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978280807699775,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991946576228656,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9639496663585305,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654018925502896,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478466277942,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765551248565316,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993222054094,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812793986639008,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867310639237985,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869522373774089,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879012388410047,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883594005950727,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903323815669864,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910267862142064,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912635658401996,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921826082281768,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951284613925964,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960675196198281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962311515701003,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977348528045695,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990211970216478,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9561710001435131,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574132142588496,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9650822830153629,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674224474001676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779465935425833,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798049030359834,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829885881626979,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884562431252562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895629284437746,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887702904525213,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902152858558111,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942774242081214,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995120030187536,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968639670987613,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970916908496292,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977569706679787,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991551881867053,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9918902807403356,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919913714402355,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941640416509472,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994114655040903,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943733899563085,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995450929418439,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945171528088395,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954713895567693,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960881019214867,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961418636376038,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970261527196271,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997912951730541,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969646203535376,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978629015240585,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983468192367582,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987805694254348,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983534378916374,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996761174661515,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995716315679601,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9872070293640718,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875441331532784,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897534946794622,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904376343474723,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936092627176549,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941051893692929,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950599781586789,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966741525713587,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969771623436827,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967607702419627,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971554788498906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983539172244491,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985822996895877,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999102601341292,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999159805731324,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993414144846611,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997426053332674,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.979622381972149,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979827641043812,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915289613127243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915508964331821,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917960677703377,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99220766252256,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919130146445241,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920798306411598,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899292784975842,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899077572044916,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952546581625938,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955962051753886,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952810344693717,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956986843317281,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968346282839775,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975842618150637,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968424138351111,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993985197106667,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992879528981575,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9795986886601895,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800954010570422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836475889314897,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847203819663264,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897230201167986,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905139947659336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920523978071287,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946800437464844,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951683254330419,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947878780949395,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954185747192241,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973539462371264,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977197637344943,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985762029973557,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998654469876783,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989491958876897,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996122916345485,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9591868193820119,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9606455501634628,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963689126772806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9717553314985707,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779619486071169,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790570001350716,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839622974977829,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852984003955498,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869856234290637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874973931582645,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891311423270963,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899767622118816,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899715750943869,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910780027857982,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943656724644825,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953498630784452,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956311181304045,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976474752329523,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988193828030489,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9518720533233136,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9533123800065368,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9613713058643043,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9637906108982861,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758038044674322,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977887489949353,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812068799510598,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873612260562368,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885673976968974,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876663720351644,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892762696254067,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937061189557426,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946411842829548,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965408335992834,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967898455652175,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974644881876884,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999061718754092,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9581109315622598,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596416996791959,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630198783706874,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733511302620173,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787798321340233,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791236850433052,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867767564137466,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872424571658485,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881223333068192,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891258522984572,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893495296710171,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899621973163448,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904887008597143,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912755576951895,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994794098805869,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956135836691828,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996461629998521,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975838983955327,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990680614864687,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.949402768863365,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9508629126939923,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9593443237245083,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619821181986481,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744504160480574,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766377885825932,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980175971868448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865899439901114,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878867646912113,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869770252262242,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886702921357937,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933583022502717,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943471373990178,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996348384549492,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966181806812529,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973575899493881,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999004362798587,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9531727249268442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9553546705283225,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9591875285841525,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703965260414407,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756896772887558,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762924946844578,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842725213966332,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850752123165876,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985899701656308,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863111876766197,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987529011850711,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885381097556092,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890030458336696,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899866348132491,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938212371780537,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994938170624664,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954652949527372,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972972613177262,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988004061815445,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9480401042383164,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949517953209579,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9579005774576217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9605092275887728,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739315117476508,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761737691005692,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796711904928088,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861559904529713,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875617006327957,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866939535131678,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884115278837271,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931988872704096,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942056277068332,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962179714348167,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965195794648025,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972543855546974,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988709868557635,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9528840403072536,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9542537578381598,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580731589812785,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668758142506704,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9747469594003633,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754544387105852,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98189686704427,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828780224779621,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843287441181019,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857511474983767,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870679471641779,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881277907988988,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880640183691867,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891430772840977,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934388622350525,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947039625258185,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948006026097573,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971943795535481,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985773553344188,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9452786394394934,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9468083463143557,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9557567997835577,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585219516884536,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725894997827709,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748451801715419,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785158903105184,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856567675014958,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869674782385118,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860588647425175,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878197305952199,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928987653984223,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939260903047398,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960985295183491,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963896564295283,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529023896437,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989376296944101,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9910135175450705,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912824938655831,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941338914213702,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941643851052504,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945628636050969,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953863266273402,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948620661452878,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955346850911155,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961977519269567,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962104187288787,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976131010480458,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973233057680773,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976833624823485,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997326233657077,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980814326991094,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988377549670986,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980935500643682,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996226447292429,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994861224859051,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9842658781562932,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846874004579149,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872097237966955,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880227182293311,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921422847255599,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927411952521652,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993845232820604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959024451673031,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962649137887638,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960189153935062,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964987884595757,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997976030492282,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982532844805974,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988919206953142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989660839673888,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991774850786896,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996749793717754,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9837653411086649,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840333891916089,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904419900849462,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904393116594292,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907222538604401,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885966366273351,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906790258246474,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888943562982604,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991739244927885,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916874898481183,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955156600044575,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954305627325084,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955371092073619,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360554984305,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968435579503421,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997513074951712,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968456693313783,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993884474752122,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992451430443907,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9759478892665356,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765194484498352,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803963751764968,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815706800436601,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878923632204533,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888117971713655,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904785320395604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937392670253757,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994295743497787,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938581893220544,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99459478398785,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996881335915532,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973090803541709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983202805378824,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984141130771604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987336630001664,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995366168823239,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9556966400705278,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9570334849413484,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.960812549572438,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703220267547294,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772858648793772,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778754758881405,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854066136176698,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862237990018912,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869342013844289,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879776879679412,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887068463722244,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893193228635937,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898968231282197,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162148156203,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942090424301568,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953937472309917,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957477512361947,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974927010043757,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988711644837167,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.939760722219944,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9414845332503319,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9511478268541396,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.954194939462468,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698484404943883,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972340663196519,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763197798747569,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841194238979369,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855774453608319,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846697235479951,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866198089439422,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921901000780053,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933257188240532,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956658300361596,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960375506052515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968587130861124,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998800835244765,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9503053929656744,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9525580331683159,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565810647327453,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682650562608615,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738911159802228,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746871701208875,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981898903963156,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829692101338878,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843669487163424,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848410444683395,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867640393786132,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879159886040725,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881031874101609,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892810968449339,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934559455141425,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945060422469396,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950925561715849,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970777546841418,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986730054151849,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9339546884875745,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9359053233638406,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9467246429994702,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9503237595781684,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668553009396419,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696119821164757,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741377918981016,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823939295019954,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840543393511325,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831261212239042,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985276190796867,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913982859579846,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926482754817698,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951997506432235,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956310951092746,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966006540635135,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986620795898489,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.950924233533442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526217842940241,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9560005511157215,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651925785001367,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.974756367970258,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754410153254867,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821089534088969,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829862234182656,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833339191973209,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847710862522945,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871181106427684,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879768814425915,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882259144214913,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893726909067482,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922168490593322,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943546110007446,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933991891448386,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969799995742505,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998169852187857,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.930072165094316,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9321989654563367,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9437624660786241,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9476719659287483,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649747095536441,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678829013137147,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972721746424213,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812747784890234,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830624930327758,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821682657347992,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844429356744513,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909043678781018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992228524555685,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948874444817193,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953900648106355,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996433951149811,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998560123873176,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9506571341771632,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95194994029589,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554108332376927,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965399626409635,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973783713998273,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746870229719207,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815001173410565,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827296736184508,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836740818573162,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849110426730476,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869808834628202,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881130746216513,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988304317521397,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895301525830291,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934029994474258,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946216194075532,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949501673690975,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529373142403,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986130048564519,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9300565491430461,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9322172533720732,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9436244373209774,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475068859755993,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649612298235297,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478236939758,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727855251403525,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812858692603186,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830675406847149,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821449116570875,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844449243973941,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908643537783064,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922332215355709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948491686081979,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995345929695759,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963671152072493,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985312398639508,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9852219514432363,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854540597880259,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992581088328734,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925188558117952,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992044290585909,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908394656376913,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921488391701132,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909024515654892,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931627179321367,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933414816041477,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962164734170074,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975688391277799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962678278825479,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975411284103757,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981369049564819,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986740430613281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981451820058282,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850489922304,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994992236806866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9810774835059419,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816042414167896,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984690910903737,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857331512612291,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99059497367125,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913256106083281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926997422298882,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950385903939605,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954885762417689,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952267833868973,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958167897420935,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975691078143427,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979071016423404,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986456794285914,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987542949966155,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990193766789162,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995884474592458,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9801707121077925,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803194139385596,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864167625200935,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861038540257141,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867335963645019,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873976144008338,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986821249302011,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876039975206368,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904251811094582,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903880580095574,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943270806106739,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950134730897844,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943478996865451,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949842338974122,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966566005459754,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976610257581342,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966578336025123,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993154952098848,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991263385672937,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9730628487886861,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973774635582231,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779872408835217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792550611309707,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865127976518124,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875629763700999,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893352640210651,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929699986823834,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993587943376042,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931577415263746,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939938187017106,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965159249695716,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970039997715503,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980992393029737,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982241835823515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985617383063072,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99946317839931,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.960547131486237,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619710550177842,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641266402322799,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726306177908555,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774892054265365,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978482106118463,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831622139317915,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843430254259147,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861415453488007,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869873017305508,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988334433583077,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893150111893192,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892438768292777,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901899283868261,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938217146846,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951323153509293,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948282100085635,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976286789024016,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985810204962036,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9314752910286188,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9334497996605933,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442711595911533,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477534026373178,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658016683533788,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686134352814406,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731169363949448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820591834140942,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836634104140103,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826249615289271,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984836339019239,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911455910187215,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924397784052417,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951095145952422,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954999905312434,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996416717855027,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986326706202817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9617549669928849,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633937487378716,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656938512343913,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973594733630307,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796587142627686,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803864565910771,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848732578684576,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858064573490992,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872101178625599,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877110663801432,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895211174734868,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901978281559423,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903222138527781,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910594234825112,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943284161563497,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953842611867003,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952440769411623,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976959823688958,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986929383812821,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9317440665327013,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9337658795993775,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442870973143727,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475885890424252,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661285383626819,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968816627166234,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731064232764766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823057807516307,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838297074311413,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828014142112806,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849381670355797,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912480839411728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925001342198811,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951820038841106,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955530187871773,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964022201456828,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986571829067543,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9678770737955347,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691678545204923,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709918311564252,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765152925392613,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807526028016582,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820293172961101,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841885701171122,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855642172624357,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866925096139312,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870485090650618,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897773859556764,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902223890530877,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902153389411978,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909679422853515,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942741809645668,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955411965493113,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948284650454298,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980486317217583,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986889937572414,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.932304578833282,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342344424221665,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444406803231686,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.947586020687595,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662447790615261,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689184916205704,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731233624042943,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98242219875101,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839446639525704,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828673212323338,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849894531071186,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912908738479018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925277467991691,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952316170674749,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955659814586397,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964031476119999,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986658465059008,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9655442168004811,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671692294068635,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697359376586974,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761799619300291,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803578312275931,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814343429170549,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843596651917323,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858793318271637,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869456980959512,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874889963539317,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896778290858492,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908777676173486,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990540920291096,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991681206272915,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948772302595899,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958305972686503,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956192086392548,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997941504814662,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988134019586141,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9325818351935595,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9344900948926806,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443744430318475,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474421949125826,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663861433509737,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690310031874105,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731040432816371,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824838677886873,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839952855254523,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829381878953427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850510018295608,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913280457840301,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925561861309689,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952473715238739,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995588112360565,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963884983881144,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986808212925098,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9875456623267382,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879995033843443,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943620861449745,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943356119911186,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945071052061394,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966131598775974,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945579385675956,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965798364137299,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963930066151079,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963805679144571,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978544423211133,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978882907453226,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979100275522796,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979497697204351,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985688193410169,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992296196833195,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985732258646749,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997000065504835,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999592912312437,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9806502945721149,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981183686060831,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839521977119148,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848100668168627,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903757648426108,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911127613158897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922769031254575,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950082659197506,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954419361311011,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951236607448664,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957144900981802,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975175310974009,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978603233030299,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986405047020526,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987238197063562,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989435812094598,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850031682494,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9871479421271943,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880682986695319,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990131174272392,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898401049431413,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900419003679417,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925147893663961,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990109168633353,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992998635803815,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936714109790046,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937381937343162,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962826371920528,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965584015008062,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964808065269608,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966537497966783,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974247746868059,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984650467449683,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99742937242263,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999517957230637,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992533045333403,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9704249275382608,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9712161585921422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754329854622483,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766340904170647,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985306327464059,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864371244912036,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881206953432411,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924467977543827,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930931831186172,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925579508999363,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934620868298225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962237248546444,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967452616401715,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979627527136472,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998072977818083,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983912546158535,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994254562079732,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.965649431804195,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668996206019074,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694499158067629,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9755590457934886,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807646207045764,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981554796686396,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854566292488016,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863431870471686,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881037988234311,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988736093393527,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904858563677408,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914403361617588,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915653750649653,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924774839309976,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99530891442555,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961586743447697,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996117347778636,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978859801776707,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989366781519493,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9328135452233255,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9347493099048734,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444518794771284,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474378717131913,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966479453491047,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691691853804514,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731421418255195,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982578118913807,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840653012506664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829857222503051,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851169427274726,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913530095946044,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925886914716102,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952522270032205,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956026887812186,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963736499339575,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830660418491,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9632973142433912,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647458797553554,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677944149589166,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744596001692116,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805283090099692,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815037929220125,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855152769596316,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868621388450265,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877731337910518,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881437398144044,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905192241421901,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911876061523799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915461755299475,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924024375504814,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948597592592705,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961402852204628,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957100535684731,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979259906467632,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988766744427267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9317238796502352,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336495161987841,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9432325733359903,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462370390538126,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965948719996959,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686132016358897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725569972069934,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822160669136792,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837689126143232,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827277816366404,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848514216719195,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912281962460838,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924635672650766,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951937775185797,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459379707463,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996324696301599,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986840636047418,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9624556459020823,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9638149088714272,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966128898318857,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731651132460684,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979888558271341,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807631041621789,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854432169813663,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865111434482969,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881467132945545,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988405313459225,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051496224478,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905749676981941,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908825725433417,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917469431529753,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947028355381917,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959433214535238,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956959496485069,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977895863121375,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998879095823213,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.9317085896618664,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336435773875564,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9429558923002332,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9459052374586463,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658660412533209,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685534046730027,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97245246428065,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821948111057281,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837224487564526,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826810664962977,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848097579088062,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912046356475912,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924372929672245,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951681817183271,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995533375418745,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962979012634605,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986440870707156,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9656216644216329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668171582743526,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708404617849737,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773957233410329,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818287189118564,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825881774304435,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866229476756416,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877469159546308,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876828042906709,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885670749936253,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910485846921802,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914193961885758,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918927509279456,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924896143202204,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954261311504524,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962042814731831,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963586331723491,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980747035369859,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989914739744563,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.9323020433075726,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342701844871044,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9433167146053165,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462174526415765,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660997575847432,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687749480362982,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725526751717553,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822096759453416,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837591436225921,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828054906101897,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849168861983344,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991255592321977,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924872927949764,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951640699291602,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955584222916514,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962992245709756,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986363174612052,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.self_attn": [
+ {
+ "accuracy": 0.9923371153709013,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925224412581883,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954915028938558,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99549823399866,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995459749334259,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971324702346465,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955590646713972,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972384700377006,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975309232977452,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975821622356307,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985752828288241,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984370510574081,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985908498783829,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998461567243794,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989729408916901,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991191346489359,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989771689652116,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997115908463456,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999673805286875,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.mlp": [
+ {
+ "accuracy": 0.9811925254762173,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817058588378131,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842266714549623,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849823992699385,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905871613882482,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913147779298015,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923701590159908,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951182099757716,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955421296763234,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952246638713405,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958023339568172,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975680393690709,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979028400339303,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986676175176399,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987487139806035,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989479207215481,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999586516083582,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.self_attn": [
+ {
+ "accuracy": 0.9863162519177422,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870703057968058,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923058476706501,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921633902995382,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924568198912311,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943161772680469,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924855433346238,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943068775464781,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946824459475465,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947726883983705,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960416847025044,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286915777251,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961076912295539,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971158314438071,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978602468036115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983802583883516,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978632671263767,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995351644083712,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993034406106744,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.mlp": [
+ {
+ "accuracy": 0.9722781821619719,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730187952518463,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761316905496642,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97704425919801,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861467212904245,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872288115439005,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885241388110444,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927656080690213,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933978671906516,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929643408977427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993827001657337,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964197074586991,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969165127258748,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980382031499175,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981734739849344,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984235907904804,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994161145659746,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.self_attn": [
+ {
+ "accuracy": 0.9635075002443045,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647748144343495,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678804749855772,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975613204529509,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811677185352892,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817737329285592,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870748340035789,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877749991719611,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888431992731057,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889247854589485,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905630798893981,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912089991848916,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916505919245537,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922619820863474,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952286969055422,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961338660214096,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963223397062393,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978658634063322,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989551416583708,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.mlp": [
+ {
+ "accuracy": 0.9316645334474742,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9335778222884983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9425450335256755,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455189639702439,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656224818900228,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683304415084422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721890660002828,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981844296795316,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834737943019718,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825274200411513,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846752261510119,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911125443177298,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923583892232273,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950715682061855,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954821281717159,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996249489224283,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985835553889046,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.self_attn": [
+ {
+ "accuracy": 0.9537865724414587,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9551741087343544,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607197451405227,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698350551771,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976477846968919,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769113931106403,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844105805968866,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849680608604103,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986211190931499,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871965564088896,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881930081755854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889599440502934,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989799489849247,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162202289328,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940077341743745,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952460718923248,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952964097028598,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973116889304947,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988119364206796,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.mlp": [
+ {
+ "accuracy": 0.9297301913611591,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9317178723867983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.941267877118662,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443992015440017,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644244713708758,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9672436950495467,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714020473184064,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993310529739,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98275462386664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818051372421905,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839897501515225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906999649247155,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919934530335013,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948506485670805,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995264163473621,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960826287424425,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985817404231057,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.self_attn": [
+ {
+ "accuracy": 0.9438093651551753,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455973405856639,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9558804116677493,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643303193151951,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721492730313912,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696818670490757,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780592895112932,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975161383044906,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798537619644776,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800844814162701,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868916404084302,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878041589981876,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882374077569693,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889709630515426,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932704607781488,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943467889679596,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945205381663982,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971068996092072,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984773485775804,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.mlp": [
+ {
+ "accuracy": 0.922890292480588,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.924928605556488,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9353087733034045,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9386197202838957,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604559869039804,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964205636177212,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687150722602382,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781123218126595,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808429721742868,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800167196663097,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823547832202166,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989655573212076,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911466648918577,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942131606221665,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947176255809609,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99570402916288,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983965002611512,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.self_attn": [
+ {
+ "accuracy": 0.9432861453387886,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456116510555148,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9537060365546495,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966851421049796,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733642505016178,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735991606721655,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824671322712675,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828924887115136,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846992893726565,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985385547101032,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866445885854773,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875469116959721,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884863024926744,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894941137754358,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993721309438115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994685380050214,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952394235297106,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971018662472488,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986625081146485,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.mlp": [
+ {
+ "accuracy": 0.9337531251367182,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9352742773480713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434063888620585,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9460821493994445,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660537266172469,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689954988425598,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726249100640416,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820060149068013,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838739162078127,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826479388866574,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848665873287246,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910833079484291,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924003609921783,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950582988094538,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360906558577,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962620872247498,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985231003520312,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.self_attn": [
+ {
+ "accuracy": 0.9973379723232938,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975171713740565,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982815929397475,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984281775759882,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830925190588,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987098783676629,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991939260071376,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992102088763204,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992937228125811,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993120388971874,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993242765995092,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993784863581823,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995262923948758,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995612997481658,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997043807688897,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997337334916665,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997336823016667,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998307124706116,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998758041801921,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.mlp": [
+ {
+ "accuracy": 0.9914991960686166,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917268468707334,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927488385292236,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930720798729453,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958052360889269,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961346215859521,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966010905336589,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978082937304862,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980109936295776,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978642705682432,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981226952077122,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988933841959806,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990413547820935,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993695393313828,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999404157664685,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994923167796514,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997564339473684,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.self_attn": [
+ {
+ "accuracy": 0.9942247387953103,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946865537785925,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962510646437295,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964844116329914,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972275005711708,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972892192599829,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998286331188865,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984212848285097,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985290603726753,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985686516301939,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986273442555103,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987223440039088,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990466599265346,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991211750748334,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994395356698078,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995100941032433,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995127596866951,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997207773012633,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998063149869267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.mlp": [
+ {
+ "accuracy": 0.9895866647129878,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898669680696912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911745040444657,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915538832719903,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948009337531403,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952090648002923,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957429091446102,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973041417542845,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975289096473716,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997346305361134,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976660298998468,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986318852752447,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988157128464081,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992340586177306,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992737183492864,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993743035811349,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997262309279904,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "last_module_idx": 98
+}
\ No newline at end of file
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd50792103e113781267be8add7e594d9a147499
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,442 @@
+{
+ "metadata": {
+ "total_size": 24952840192
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00006-of-00006.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.norm.weight": "model-00005-of-00006.safetensors"
+ }
+}
diff --git a/mtbench-comparison.png b/mtbench-comparison.png
new file mode 100644
index 0000000000000000000000000000000000000000..87fb2c26715940a222e99a74c5562402dca6d40f
Binary files /dev/null and b/mtbench-comparison.png differ
diff --git a/needle-in-a-haystack.txt b/needle-in-a-haystack.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b6cd4182b1d1ffafd0cae6ea2a51af9579becf
--- /dev/null
+++ b/needle-in-a-haystack.txt
@@ -0,0 +1,898 @@
+what is the random number?
+```
+May 2006(This essay is derived from a keynote at Xtech.)Could you reproduce Silicon Valley elsewhere, or is there something
+unique about it?It wouldn't be surprising if it were hard to reproduce in other
+countries, because you couldn't reproduce it in most of the US
+either. What does it take to make a silicon valley even here?What it takes is the right people. If you could get the right ten
+thousand people to move from Silicon Valley to Buffalo, Buffalo
+would become Silicon Valley.
+[1]That's a striking departure from the past. Up till a couple decades
+ago, geography was destiny for cities. All great cities were located
+on waterways, because cities made money by trade, and water was the
+only economical way to ship.Now you could make a great city anywhere, if you could get the right
+people to move there. So the question of how to make a silicon
+valley becomes: who are the right people, and how do you get them
+to move?Two TypesI think you only need two kinds of people to create a technology
+hub: rich people and nerds. They're the limiting reagents in the
+reaction that produces startups, because they're the only ones
+present when startups get started. Everyone else will move.Observation bears this out: within the US, towns have become startup
+hubs if and only if they have both rich people and nerds. Few
+startups happen in Miami, for example, because although it's full
+of rich people, it has few nerds. It's not the kind of place nerds
+like.Whereas Pittsburgh has the opposite problem: plenty of nerds, but
+no rich people. The top US Computer Science departments are said
+to be MIT, Stanford, Berkeley, and Carnegie-Mellon. MIT yielded
+Route 128. Stanford and Berkeley yielded Silicon Valley. But
+Carnegie-Mellon? The record skips at that point. Lower down the
+list, the University of Washington yielded a high-tech community
+in Seattle, and the University of Texas at Austin yielded one in
+Austin. But what happened in Pittsburgh? And in Ithaca, home of
+Cornell, which is also high on the list?I grew up in Pittsburgh and went to college at Cornell, so I can
+answer for both. The weather is terrible, particularly in winter,
+and there's no interesting old city to make up for it, as there is
+in Boston. Rich people don't want to live in Pittsburgh or Ithaca.
+So while there are plenty of hackers who could start startups,
+there's no one to invest in them.Not BureaucratsDo you really need the rich people? Wouldn't it work to have the
+government invest in the nerds? No, it would not. Startup investors
+are a distinct type of rich people. They tend to have a lot of
+experience themselves in the technology business. This (a) helps
+them pick the right startups, and (b) means they can supply advice
+and connections as well as money. And the fact that they have a
+personal stake in the outcome makes them really pay attention.Bureaucrats by their nature are the exact opposite sort of people
+from startup investors. The idea of them making startup investments
+is comic. It would be like mathematicians running Vogue-- or
+perhaps more accurately, Vogue editors running a math journal.
+[2]Though indeed, most things bureaucrats do, they do badly. We just
+don't notice usually, because they only have to compete against
+other bureaucrats. But as startup investors they'd have to compete
+against pros with a great deal more experience and motivation.Even corporations that have in-house VC groups generally forbid
+them to make their own investment decisions. Most are only allowed
+to invest in deals where some reputable private VC firm is willing
+to act as lead investor.Not BuildingsIf you go to see Silicon Valley, what you'll see are buildings.
+But it's the people that make it Silicon Valley, not the buildings.
+I read occasionally about attempts to set up "technology
+parks" in other places, as if the active ingredient of Silicon
+Valley were the office space. An article about Sophia Antipolis
+bragged that companies there included Cisco, Compaq, IBM, NCR, and
+Nortel. Don't the French realize these aren't startups?Building office buildings for technology companies won't get you a
+silicon valley, because the key stage in the life of a startup
+happens before they want that kind of space. The key stage is when
+they're three guys operating out of an apartment. Wherever the
+startup is when it gets funded, it will stay. The defining quality
+of Silicon Valley is not that Intel or Apple or Google have offices
+there, but that they were started there.So if you want to reproduce Silicon Valley, what you need to reproduce
+is those two or three founders sitting around a kitchen table
+deciding to start a company. And to reproduce that you need those
+people.UniversitiesThe exciting thing is, all you need are the people. If you could
+attract a critical mass of nerds and investors to live somewhere,
+you could reproduce Silicon Valley. And both groups are highly
+mobile. They'll go where life is good. So what makes a place good
+to them?What nerds like is other nerds. Smart people will go wherever other
+smart people are. And in particular, to great universities. In
+theory there could be other ways to attract them, but so far
+universities seem to be indispensable. Within the US, there are
+no technology hubs without first-rate universities-- or at least,
+first-rate computer science departments.So if you want to make a silicon valley, you not only need a
+university, but one of the top handful in the world. It has to be
+good enough to act as a magnet, drawing the best people from thousands
+of miles away. And that means it has to stand up to existing magnets
+like MIT and Stanford.This sounds hard. Actually it might be easy. My professor friends,
+when they're deciding where they'd like to work, consider one thing
+above all: the quality of the other faculty. What attracts professors
+is good colleagues. So if you managed to recruit, en masse, a
+significant number of the best young researchers, you could create
+a first-rate university from nothing overnight. And you could do
+that for surprisingly little. If you paid 200 people hiring bonuses
+of $3 million apiece, you could put together a faculty that would
+bear comparison with any in the world. And from that point the
+chain reaction would be self-sustaining. So whatever it costs to
+establish a mediocre university, for an additional half billion or
+so you could have a great one.
+Random number is 23.
+[3]PersonalityHowever, merely creating a new university would not be enough to
+start a silicon valley. The university is just the seed. It has
+to be planted in the right soil, or it won't germinate. Plant it
+in the wrong place, and you just create Carnegie-Mellon.To spawn startups, your university has to be in a town that has
+attractions other than the university. It has to be a place where
+investors want to live, and students want to stay after they graduate.The two like much the same things, because most startup investors
+are nerds themselves. So what do nerds look for in a town? Their
+tastes aren't completely different from other people's, because a
+lot of the towns they like most in the US are also big tourist
+destinations: San Francisco, Boston, Seattle. But their tastes
+can't be quite mainstream either, because they dislike other big
+tourist destinations, like New York, Los Angeles, and Las Vegas.There has been a lot written lately about the "creative class." The
+thesis seems to be that as wealth derives increasingly from ideas,
+cities will prosper only if they attract those who have them. That
+is certainly true; in fact it was the basis of Amsterdam's prosperity
+400 years ago.A lot of nerd tastes they share with the creative class in general.
+For example, they like well-preserved old neighborhoods instead of
+cookie-cutter suburbs, and locally-owned shops and restaurants
+instead of national chains. Like the rest of the creative class,
+they want to live somewhere with personality.What exactly is personality? I think it's the feeling that each
+building is the work of a distinct group of people. A town with
+personality is one that doesn't feel mass-produced. So if you want
+to make a startup hub-- or any town to attract the "creative class"--
+you probably have to ban large development projects.
+When a large tract has been developed by a single organization, you
+can always tell.
+[4]Most towns with personality are old, but they don't have to be.
+Old towns have two advantages: they're denser, because they were
+laid out before cars, and they're more varied, because they were
+built one building at a time. You could have both now. Just have
+building codes that ensure density, and ban large scale developments.A corollary is that you have to keep out the biggest developer of
+all: the government. A government that asks "How can we build a
+silicon valley?" has probably ensured failure by the way they framed
+the question. You don't build a silicon valley; you let one grow.NerdsIf you want to attract nerds, you need more than a town with
+personality. You need a town with the right personality. Nerds
+are a distinct subset of the creative class, with different tastes
+from the rest. You can see this most clearly in New York, which
+attracts a lot of creative people, but few nerds.
+[5]What nerds like is the kind of town where people walk around smiling.
+This excludes LA, where no one walks at all, and also New York,
+where people walk, but not smiling. When I was in grad school in
+Boston, a friend came to visit from New York. On the subway back
+from the airport she asked "Why is everyone smiling?" I looked and
+they weren't smiling. They just looked like they were compared to
+the facial expressions she was used to.If you've lived in New York, you know where these facial expressions
+come from. It's the kind of place where your mind may be excited,
+but your body knows it's having a bad time. People don't so much
+enjoy living there as endure it for the sake of the excitement.
+And if you like certain kinds of excitement, New York is incomparable.
+It's a hub of glamour, a magnet for all the shorter half-life
+isotopes of style and fame.Nerds don't care about glamour, so to them the appeal of New York
+is a mystery. People who like New York will pay a fortune for a
+small, dark, noisy apartment in order to live in a town where the
+cool people are really cool. A nerd looks at that deal and sees
+only: pay a fortune for a small, dark, noisy apartment.Nerds will pay a premium to live in a town where the smart people
+are really smart, but you don't have to pay as much for that. It's
+supply and demand: glamour is popular, so you have to pay a lot for
+it.Most nerds like quieter pleasures. They like cafes instead of
+clubs; used bookshops instead of fashionable clothing shops; hiking
+instead of dancing; sunlight instead of tall buildings. A nerd's
+idea of paradise is Berkeley or Boulder.YouthIt's the young nerds who start startups, so it's those specifically
+the city has to appeal to. The startup hubs in the US are all
+young-feeling towns. This doesn't mean they have to be new.
+Cambridge has the oldest town plan in America, but it feels young
+because it's full of students.What you can't have, if you want to create a silicon valley, is a
+large, existing population of stodgy people. It would be a waste
+of time to try to reverse the fortunes of a declining industrial town
+like Detroit or Philadelphia by trying to encourage startups. Those
+places have too much momentum in the wrong direction. You're better
+off starting with a blank slate in the form of a small town. Or
+better still, if there's a town young people already flock to, that
+one.The Bay Area was a magnet for the young and optimistic for decades
+before it was associated with technology. It was a place people
+went in search of something new. And so it became synonymous with
+California nuttiness. There's still a lot of that there. If you
+wanted to start a new fad-- a new way to focus one's "energy," for
+example, or a new category of things not to eat-- the Bay Area would
+be the place to do it. But a place that tolerates oddness in the
+search for the new is exactly what you want in a startup hub, because
+economically that's what startups are. Most good startup ideas
+seem a little crazy; if they were obviously good ideas, someone
+would have done them already.(How many people are going to want computers in their houses?
+What, another search engine?)That's the connection between technology and liberalism. Without
+exception the high-tech cities in the US are also the most liberal.
+But it's not because liberals are smarter that this is so. It's
+because liberal cities tolerate odd ideas, and smart people by
+definition have odd ideas.Conversely, a town that gets praised for being "solid" or representing
+"traditional values" may be a fine place to live, but it's never
+going to succeed as a startup hub. The 2004 presidential election,
+though a disaster in other respects, conveniently supplied us with
+a county-by-county
+map of such places.
+[6]To attract the young, a town must have an intact center. In most
+American cities the center has been abandoned, and the growth, if
+any, is in the suburbs. Most American cities have been turned
+inside out. But none of the startup hubs has: not San Francisco,
+or Boston, or Seattle. They all have intact centers.
+[7]
+My guess is that no city with a dead center could be turned into a
+startup hub. Young people don't want to live in the suburbs.Within the US, the two cities I think could most easily be turned
+into new silicon valleys are Boulder and Portland. Both have the
+kind of effervescent feel that attracts the young. They're each
+only a great university short of becoming a silicon valley, if they
+wanted to.TimeA great university near an attractive town. Is that all it takes?
+That was all it took to make the original Silicon Valley. Silicon
+Valley traces its origins to William Shockley, one of the inventors
+of the transistor. He did the research that won him the Nobel Prize
+at Bell Labs, but when he started his own company in 1956 he moved
+to Palo Alto to do it. At the time that was an odd thing to do.
+Why did he? Because he had grown up there and remembered how nice
+it was. Now Palo Alto is suburbia, but then it was a charming
+college town-- a charming college town with perfect weather and San
+Francisco only an hour away.The companies that rule Silicon Valley now are all descended in
+various ways from Shockley Semiconductor. Shockley was a difficult
+man, and in 1957 his top people-- "the traitorous eight"-- left to
+start a new company, Fairchild Semiconductor. Among them were
+Gordon Moore and Robert Noyce, who went on to found Intel, and
+Eugene Kleiner, who founded the VC firm Kleiner Perkins. Forty-two
+years later, Kleiner Perkins funded Google, and the partner responsible
+for the deal was John Doerr, who came to Silicon Valley in 1974 to
+work for Intel.So although a lot of the newest companies in Silicon Valley don't
+make anything out of silicon, there always seem to be multiple links
+back to Shockley. There's a lesson here: startups beget startups.
+People who work for startups start their own. People who get rich
+from startups fund new ones. I suspect this kind of organic growth
+is the only way to produce a startup hub, because it's the only way
+to grow the expertise you need.That has two important implications. The first is that you need
+time to grow a silicon valley. The university you could create in
+a couple years, but the startup community around it has to grow
+organically. The cycle time is limited by the time it takes a
+company to succeed, which probably averages about five years.The other implication of the organic growth hypothesis is that you
+can't be somewhat of a startup hub. You either have a self-sustaining
+chain reaction, or not. Observation confirms this too: cities
+either have a startup scene, or they don't. There is no middle
+ground. Chicago has the third largest metropolitan area in America.
+As source of startups it's negligible compared to Seattle, number 15.The good news is that the initial seed can be quite small. Shockley
+Semiconductor, though itself not very successful, was big enough.
+It brought a critical mass of experts in an important new technology
+together in a place they liked enough to stay.CompetingOf course, a would-be silicon valley faces an obstacle the original
+one didn't: it has to compete with Silicon Valley. Can that be
+done? Probably.One of Silicon Valley's biggest advantages is its venture capital
+firms. This was not a factor in Shockley's day, because VC funds
+didn't exist. In fact, Shockley Semiconductor and Fairchild
+Semiconductor were not startups at all in our sense. They were
+subsidiaries-- of Beckman Instruments and Fairchild Camera and
+Instrument respectively. Those companies were apparently willing
+to establish subsidiaries wherever the experts wanted to live.Venture investors, however, prefer to fund startups within an hour's
+drive. For one, they're more likely to notice startups nearby.
+But when they do notice startups in other towns they prefer them
+to move. They don't want to have to travel to attend board meetings,
+and in any case the odds of succeeding are higher in a startup hub.The centralizing effect of venture firms is a double one: they cause
+startups to form around them, and those draw in more startups through
+acquisitions. And although the first may be weakening because it's
+now so cheap to start some startups, the second seems as strong as ever.
+Three of the most admired
+"Web 2.0" companies were started outside the usual startup hubs,
+but two of them have already been reeled in through acquisitions.Such centralizing forces make it harder for new silicon valleys to
+get started. But by no means impossible. Ultimately power rests
+with the founders. A startup with the best people will beat one
+with funding from famous VCs, and a startup that was sufficiently
+successful would never have to move. So a town that
+could exert enough pull over the right people could resist and
+perhaps even surpass Silicon Valley.For all its power, Silicon Valley has a great weakness: the paradise
+Shockley found in 1956 is now one giant parking lot. San Francisco
+and Berkeley are great, but they're forty miles away. Silicon
+Valley proper is soul-crushing suburban sprawl. It
+has fabulous weather, which makes it significantly better than the
+soul-crushing sprawl of most other American cities. But a competitor
+that managed to avoid sprawl would have real leverage. All a city
+needs is to be the kind of place the next traitorous eight look at
+and say "I want to stay here," and that would be enough to get the
+chain reaction started.Notes[1]
+It's interesting to consider how low this number could be
+made. I suspect five hundred would be enough, even if they could
+bring no assets with them. Probably just thirty, if I could pick them,
+would be enough to turn Buffalo into a significant startup hub.[2]
+Bureaucrats manage to allocate research funding moderately
+well, but only because (like an in-house VC fund) they outsource
+most of the work of selection. A professor at a famous university
+who is highly regarded by his peers will get funding, pretty much
+regardless of the proposal. That wouldn't work for startups, whose
+founders aren't sponsored by organizations, and are often unknowns.[3]
+You'd have to do it all at once, or at least a whole department
+at a time, because people would be more likely to come if they
+knew their friends were. And you should probably start from scratch,
+rather than trying to upgrade an existing university, or much energy
+would be lost in friction.[4]
+Hypothesis: Any plan in which multiple independent buildings
+are gutted or demolished to be "redeveloped" as a single project
+is a net loss of personality for the city, with the exception of
+the conversion of buildings not previously public, like warehouses.[5]
+A few startups get started in New York, but less
+than a tenth as many per capita as in Boston, and mostly
+in less nerdy fields like finance and media.[6]
+Some blue counties are false positives (reflecting the
+remaining power of Democractic party machines), but there are no
+false negatives. You can safely write off all the red counties.[7]
+Some "urban renewal" experts took a shot at destroying Boston's
+in the 1960s, leaving the area around city hall a bleak wasteland,
+but most neighborhoods successfully resisted them.Thanks to Chris Anderson, Trevor Blackwell, Marc Hedlund,
+Jessica Livingston, Robert Morris, Greg Mcadoo, Fred Wilson,
+and Stephen Wolfram for
+reading drafts of this, and to Ed Dumbill for inviting me to speak.(The second part of this talk became Why Startups
+Condense in America.)
+May 2001(This article was written as a kind of business plan for a
+new language.
+So it is missing (because it takes for granted) the most important
+feature of a good programming language: very powerful abstractions.)A friend of mine once told an eminent operating systems
+expert that he wanted to design a really good
+programming language. The expert told him that it would be a
+waste of time, that programming languages don't become popular
+or unpopular based on their merits, and so no matter how
+good his language was, no one would use it. At least, that
+was what had happened to the language he had designed.What does make a language popular? Do popular
+languages deserve their popularity? Is it worth trying to
+define a good programming language? How would you do it?I think the answers to these questions can be found by looking
+at hackers, and learning what they want. Programming
+languages are for hackers, and a programming language
+is good as a programming language (rather than, say, an
+exercise in denotational semantics or compiler design)
+if and only if hackers like it.1 The Mechanics of PopularityIt's true, certainly, that most people don't choose programming
+languages simply based on their merits. Most programmers are told
+what language to use by someone else. And yet I think the effect
+of such external factors on the popularity of programming languages
+is not as great as it's sometimes thought to be. I think a bigger
+problem is that a hacker's idea of a good programming language is
+not the same as most language designers'.Between the two, the hacker's opinion is the one that matters.
+Programming languages are not theorems. They're tools, designed
+for people, and they have to be designed to suit human strengths
+and weaknesses as much as shoes have to be designed for human feet.
+If a shoe pinches when you put it on, it's a bad shoe, however
+elegant it may be as a piece of sculpture.It may be that the majority of programmers can't tell a good language
+from a bad one. But that's no different with any other tool. It
+doesn't mean that it's a waste of time to try designing a good
+language. Expert hackers
+can tell a good language when they see
+one, and they'll use it. Expert hackers are a tiny minority,
+admittedly, but that tiny minority write all the good software,
+and their influence is such that the rest of the programmers will
+tend to use whatever language they use. Often, indeed, it is not
+merely influence but command: often the expert hackers are the very
+people who, as their bosses or faculty advisors, tell the other
+programmers what language to use.The opinion of expert hackers is not the only force that determines
+the relative popularity of programming languages — legacy software
+(Cobol) and hype (Ada, Java) also play a role — but I think it is
+the most powerful force over the long term. Given an initial critical
+mass and enough time, a programming language probably becomes about
+as popular as it deserves to be. And popularity further separates
+good languages from bad ones, because feedback from real live users
+always leads to improvements. Look at how much any popular language
+has changed during its life. Perl and Fortran are extreme cases,
+but even Lisp has changed a lot. Lisp 1.5 didn't have macros, for
+example; these evolved later, after hackers at MIT had spent a
+couple years using Lisp to write real programs. [1]So whether or not a language has to be good to be popular, I think
+a language has to be popular to be good. And it has to stay popular
+to stay good. The state of the art in programming languages doesn't
+stand still. And yet the Lisps we have today are still pretty much
+what they had at MIT in the mid-1980s, because that's the last time
+Lisp had a sufficiently large and demanding user base.Of course, hackers have to know about a language before they can
+use it. How are they to hear? From other hackers. But there has to
+be some initial group of hackers using the language for others even
+to hear about it. I wonder how large this group has to be; how many
+users make a critical mass? Off the top of my head, I'd say twenty.
+If a language had twenty separate users, meaning twenty users who
+decided on their own to use it, I'd consider it to be real.Getting there can't be easy. I would not be surprised if it is
+harder to get from zero to twenty than from twenty to a thousand.
+The best way to get those initial twenty users is probably to use
+a trojan horse: to give people an application they want, which
+happens to be written in the new language.2 External FactorsLet's start by acknowledging one external factor that does affect
+the popularity of a programming language. To become popular, a
+programming language has to be the scripting language of a popular
+system. Fortran and Cobol were the scripting languages of early
+IBM mainframes. C was the scripting language of Unix, and so, later,
+was Perl. Tcl is the scripting language of Tk. Java and Javascript
+are intended to be the scripting languages of web browsers.Lisp is not a massively popular language because it is not the
+scripting language of a massively popular system. What popularity
+it retains dates back to the 1960s and 1970s, when it was the
+scripting language of MIT. A lot of the great programmers of the
+day were associated with MIT at some point. And in the early 1970s,
+before C, MIT's dialect of Lisp, called MacLisp, was one of the
+only programming languages a serious hacker would want to use.Today Lisp is the scripting language of two moderately popular
+systems, Emacs and Autocad, and for that reason I suspect that most
+of the Lisp programming done today is done in Emacs Lisp or AutoLisp.Programming languages don't exist in isolation. To hack is a
+transitive verb — hackers are usually hacking something — and in
+practice languages are judged relative to whatever they're used to
+hack. So if you want to design a popular language, you either have
+to supply more than a language, or you have to design your language
+to replace the scripting language of some existing system.Common Lisp is unpopular partly because it's an orphan. It did
+originally come with a system to hack: the Lisp Machine. But Lisp
+Machines (along with parallel computers) were steamrollered by the
+increasing power of general purpose processors in the 1980s. Common
+Lisp might have remained popular if it had been a good scripting
+language for Unix. It is, alas, an atrociously bad one.One way to describe this situation is to say that a language isn't
+judged on its own merits. Another view is that a programming language
+really isn't a programming language unless it's also the scripting
+language of something. This only seems unfair if it comes as a
+surprise. I think it's no more unfair than expecting a programming
+language to have, say, an implementation. It's just part of what
+a programming language is.A programming language does need a good implementation, of course,
+and this must be free. Companies will pay for software, but individual
+hackers won't, and it's the hackers you need to attract.A language also needs to have a book about it. The book should be
+thin, well-written, and full of good examples. K&R is the ideal
+here. At the moment I'd almost say that a language has to have a
+book published by O'Reilly. That's becoming the test of mattering
+to hackers.There should be online documentation as well. In fact, the book
+can start as online documentation. But I don't think that physical
+books are outmoded yet. Their format is convenient, and the de
+facto censorship imposed by publishers is a useful if imperfect
+filter. Bookstores are one of the most important places for learning
+about new languages.3 BrevityGiven that you can supply the three things any language needs — a
+free implementation, a book, and something to hack — how do you
+make a language that hackers will like?One thing hackers like is brevity. Hackers are lazy, in the same
+way that mathematicians and modernist architects are lazy: they
+hate anything extraneous. It would not be far from the truth to
+say that a hacker about to write a program decides what language
+to use, at least subconsciously, based on the total number of
+characters he'll have to type. If this isn't precisely how hackers
+think, a language designer would do well to act as if it were.It is a mistake to try to baby the user with long-winded expressions
+that are meant to resemble English. Cobol is notorious for this
+flaw. A hacker would consider being asked to writeadd x to y giving zinstead ofz = x+yas something between an insult to his intelligence and a sin against
+God.It has sometimes been said that Lisp should use first and rest
+instead of car and cdr, because it would make programs easier to
+read. Maybe for the first couple hours. But a hacker can learn
+quickly enough that car means the first element of a list and cdr
+means the rest. Using first and rest means 50% more typing. And
+they are also different lengths, meaning that the arguments won't
+line up when they're called, as car and cdr often are, in successive
+lines. I've found that it matters a lot how code lines up on the
+page. I can barely read Lisp code when it is set in a variable-width
+font, and friends say this is true for other languages too.Brevity is one place where strongly typed languages lose. All other
+things being equal, no one wants to begin a program with a bunch
+of declarations. Anything that can be implicit, should be.The individual tokens should be short as well. Perl and Common Lisp
+occupy opposite poles on this question. Perl programs can be almost
+cryptically dense, while the names of built-in Common Lisp operators
+are comically long. The designers of Common Lisp probably expected
+users to have text editors that would type these long names for
+them. But the cost of a long name is not just the cost of typing
+it. There is also the cost of reading it, and the cost of the space
+it takes up on your screen.4 HackabilityThere is one thing more important than brevity to a hacker: being
+able to do what you want. In the history of programming languages
+a surprising amount of effort has gone into preventing programmers
+from doing things considered to be improper. This is a dangerously
+presumptuous plan. How can the language designer know what the
+programmer is going to need to do? I think language designers would
+do better to consider their target user to be a genius who will
+need to do things they never anticipated, rather than a bumbler
+who needs to be protected from himself. The bumbler will shoot
+himself in the foot anyway. You may save him from referring to
+variables in another package, but you can't save him from writing
+a badly designed program to solve the wrong problem, and taking
+forever to do it.Good programmers often want to do dangerous and unsavory things.
+By unsavory I mean things that go behind whatever semantic facade
+the language is trying to present: getting hold of the internal
+representation of some high-level abstraction, for example. Hackers
+like to hack, and hacking means getting inside things and second
+guessing the original designer.Let yourself be second guessed. When you make any tool, people use
+it in ways you didn't intend, and this is especially true of a
+highly articulated tool like a programming language. Many a hacker
+will want to tweak your semantic model in a way that you never
+imagined. I say, let them; give the programmer access to as much
+internal stuff as you can without endangering runtime systems like
+the garbage collector.In Common Lisp I have often wanted to iterate through the fields
+of a struct — to comb out references to a deleted object, for example,
+or find fields that are uninitialized. I know the structs are just
+vectors underneath. And yet I can't write a general purpose function
+that I can call on any struct. I can only access the fields by
+name, because that's what a struct is supposed to mean.A hacker may only want to subvert the intended model of things once
+or twice in a big program. But what a difference it makes to be
+able to. And it may be more than a question of just solving a
+problem. There is a kind of pleasure here too. Hackers share the
+surgeon's secret pleasure in poking about in gross innards, the
+teenager's secret pleasure in popping zits. [2] For boys, at least,
+certain kinds of horrors are fascinating. Maxim magazine publishes
+an annual volume of photographs, containing a mix of pin-ups and
+grisly accidents. They know their audience.Historically, Lisp has been good at letting hackers have their way.
+The political correctness of Common Lisp is an aberration. Early
+Lisps let you get your hands on everything. A good deal of that
+spirit is, fortunately, preserved in macros. What a wonderful thing,
+to be able to make arbitrary transformations on the source code.Classic macros are a real hacker's tool — simple, powerful, and
+dangerous. It's so easy to understand what they do: you call a
+function on the macro's arguments, and whatever it returns gets
+inserted in place of the macro call. Hygienic macros embody the
+opposite principle. They try to protect you from understanding what
+they're doing. I have never heard hygienic macros explained in one
+sentence. And they are a classic example of the dangers of deciding
+what programmers are allowed to want. Hygienic macros are intended
+to protect me from variable capture, among other things, but variable
+capture is exactly what I want in some macros.A really good language should be both clean and dirty: cleanly
+designed, with a small core of well understood and highly orthogonal
+operators, but dirty in the sense that it lets hackers have their
+way with it. C is like this. So were the early Lisps. A real hacker's
+language will always have a slightly raffish character.A good programming language should have features that make the kind
+of people who use the phrase "software engineering" shake their
+heads disapprovingly. At the other end of the continuum are languages
+like Ada and Pascal, models of propriety that are good for teaching
+and not much else.5 Throwaway ProgramsTo be attractive to hackers, a language must be good for writing
+the kinds of programs they want to write. And that means, perhaps
+surprisingly, that it has to be good for writing throwaway programs.A throwaway program is a program you write quickly for some limited
+task: a program to automate some system administration task, or
+generate test data for a simulation, or convert data from one format
+to another. The surprising thing about throwaway programs is that,
+like the "temporary" buildings built at so many American universities
+during World War II, they often don't get thrown away. Many evolve
+into real programs, with real features and real users.I have a hunch that the best big programs begin life this way,
+rather than being designed big from the start, like the Hoover Dam.
+It's terrifying to build something big from scratch. When people
+take on a project that's too big, they become overwhelmed. The
+project either gets bogged down, or the result is sterile and
+wooden: a shopping mall rather than a real downtown, Brasilia rather
+than Rome, Ada rather than C.Another way to get a big program is to start with a throwaway
+program and keep improving it. This approach is less daunting, and
+the design of the program benefits from evolution. I think, if one
+looked, that this would turn out to be the way most big programs
+were developed. And those that did evolve this way are probably
+still written in whatever language they were first written in,
+because it's rare for a program to be ported, except for political
+reasons. And so, paradoxically, if you want to make a language that
+is used for big systems, you have to make it good for writing
+throwaway programs, because that's where big systems come from.Perl is a striking example of this idea. It was not only designed
+for writing throwaway programs, but was pretty much a throwaway
+program itself. Perl began life as a collection of utilities for
+generating reports, and only evolved into a programming language
+as the throwaway programs people wrote in it grew larger. It was
+not until Perl 5 (if then) that the language was suitable for
+writing serious programs, and yet it was already massively popular.What makes a language good for throwaway programs? To start with,
+it must be readily available. A throwaway program is something that
+you expect to write in an hour. So the language probably must
+already be installed on the computer you're using. It can't be
+something you have to install before you use it. It has to be there.
+C was there because it came with the operating system. Perl was
+there because it was originally a tool for system administrators,
+and yours had already installed it.Being available means more than being installed, though. An
+interactive language, with a command-line interface, is more
+available than one that you have to compile and run separately. A
+popular programming language should be interactive, and start up
+fast.Another thing you want in a throwaway program is brevity. Brevity
+is always attractive to hackers, and never more so than in a program
+they expect to turn out in an hour.6 LibrariesOf course the ultimate in brevity is to have the program already
+written for you, and merely to call it. And this brings us to what
+I think will be an increasingly important feature of programming
+languages: library functions. Perl wins because it has large
+libraries for manipulating strings. This class of library functions
+are especially important for throwaway programs, which are often
+originally written for converting or extracting data. Many Perl
+programs probably begin as just a couple library calls stuck
+together.I think a lot of the advances that happen in programming languages
+in the next fifty years will have to do with library functions. I
+think future programming languages will have libraries that are as
+carefully designed as the core language. Programming language design
+will not be about whether to make your language strongly or weakly
+typed, or object oriented, or functional, or whatever, but about
+how to design great libraries. The kind of language designers who
+like to think about how to design type systems may shudder at this.
+It's almost like writing applications! Too bad. Languages are for
+programmers, and libraries are what programmers need.It's hard to design good libraries. It's not simply a matter of
+writing a lot of code. Once the libraries get too big, it can
+sometimes take longer to find the function you need than to write
+the code yourself. Libraries need to be designed using a small set
+of orthogonal operators, just like the core language. It ought to
+be possible for the programmer to guess what library call will do
+what he needs.Libraries are one place Common Lisp falls short. There are only
+rudimentary libraries for manipulating strings, and almost none
+for talking to the operating system. For historical reasons, Common
+Lisp tries to pretend that the OS doesn't exist. And because you
+can't talk to the OS, you're unlikely to be able to write a serious
+program using only the built-in operators in Common Lisp. You have
+to use some implementation-specific hacks as well, and in practice
+these tend not to give you everything you want. Hackers would think
+a lot more highly of Lisp if Common Lisp had powerful string
+libraries and good OS support.7 SyntaxCould a language with Lisp's syntax, or more precisely, lack of
+syntax, ever become popular? I don't know the answer to this
+question. I do think that syntax is not the main reason Lisp isn't
+currently popular. Common Lisp has worse problems than unfamiliar
+syntax. I know several programmers who are comfortable with prefix
+syntax and yet use Perl by default, because it has powerful string
+libraries and can talk to the os.There are two possible problems with prefix notation: that it is
+unfamiliar to programmers, and that it is not dense enough. The
+conventional wisdom in the Lisp world is that the first problem is
+the real one. I'm not so sure. Yes, prefix notation makes ordinary
+programmers panic. But I don't think ordinary programmers' opinions
+matter. Languages become popular or unpopular based on what expert
+hackers think of them, and I think expert hackers might be able to
+deal with prefix notation. Perl syntax can be pretty incomprehensible,
+but that has not stood in the way of Perl's popularity. If anything
+it may have helped foster a Perl cult.A more serious problem is the diffuseness of prefix notation. For
+expert hackers, that really is a problem. No one wants to write
+(aref a x y) when they could write a[x,y].In this particular case there is a way to finesse our way out of
+the problem. If we treat data structures as if they were functions
+on indexes, we could write (a x y) instead, which is even shorter
+than the Perl form. Similar tricks may shorten other types of
+expressions.We can get rid of (or make optional) a lot of parentheses by making
+indentation significant. That's how programmers read code anyway:
+when indentation says one thing and delimiters say another, we go
+by the indentation. Treating indentation as significant would
+eliminate this common source of bugs as well as making programs
+shorter.Sometimes infix syntax is easier to read. This is especially true
+for math expressions. I've used Lisp my whole programming life and
+I still don't find prefix math expressions natural. And yet it is
+convenient, especially when you're generating code, to have operators
+that take any number of arguments. So if we do have infix syntax,
+it should probably be implemented as some kind of read-macro.I don't think we should be religiously opposed to introducing syntax
+into Lisp, as long as it translates in a well-understood way into
+underlying s-expressions. There is already a good deal of syntax
+in Lisp. It's not necessarily bad to introduce more, as long as no
+one is forced to use it. In Common Lisp, some delimiters are reserved
+for the language, suggesting that at least some of the designers
+intended to have more syntax in the future.One of the most egregiously unlispy pieces of syntax in Common Lisp
+occurs in format strings; format is a language in its own right,
+and that language is not Lisp. If there were a plan for introducing
+more syntax into Lisp, format specifiers might be able to be included
+in it. It would be a good thing if macros could generate format
+specifiers the way they generate any other kind of code.An eminent Lisp hacker told me that his copy of CLTL falls open to
+the section format. Mine too. This probably indicates room for
+improvement. It may also mean that programs do a lot of I/O.8 EfficiencyA good language, as everyone knows, should generate fast code. But
+in practice I don't think fast code comes primarily from things
+you do in the design of the language. As Knuth pointed out long
+ago, speed only matters in certain critical bottlenecks. And as
+many programmers have observed since, one is very often mistaken
+about where these bottlenecks are.So, in practice, the way to get fast code is to have a very good
+profiler, rather than by, say, making the language strongly typed.
+You don't need to know the type of every argument in every call in
+the program. You do need to be able to declare the types of arguments
+in the bottlenecks. And even more, you need to be able to find out
+where the bottlenecks are.One complaint people have had with Lisp is that it's hard to tell
+what's expensive. This might be true. It might also be inevitable,
+if you want to have a very abstract language. And in any case I
+think good profiling would go a long way toward fixing the problem:
+you'd soon learn what was expensive.Part of the problem here is social. Language designers like to
+write fast compilers. That's how they measure their skill. They
+think of the profiler as an add-on, at best. But in practice a good
+profiler may do more to improve the speed of actual programs written
+in the language than a compiler that generates fast code. Here,
+again, language designers are somewhat out of touch with their
+users. They do a really good job of solving slightly the wrong
+problem.It might be a good idea to have an active profiler — to push
+performance data to the programmer instead of waiting for him to
+come asking for it. For example, the editor could display bottlenecks
+in red when the programmer edits the source code. Another approach
+would be to somehow represent what's happening in running programs.
+This would be an especially big win in server-based applications,
+where you have lots of running programs to look at. An active
+profiler could show graphically what's happening in memory as a
+program's running, or even make sounds that tell what's happening.Sound is a good cue to problems. In one place I worked, we had a
+big board of dials showing what was happening to our web servers.
+The hands were moved by little servomotors that made a slight noise
+when they turned. I couldn't see the board from my desk, but I
+found that I could tell immediately, by the sound, when there was
+a problem with a server.It might even be possible to write a profiler that would automatically
+detect inefficient algorithms. I would not be surprised if certain
+patterns of memory access turned out to be sure signs of bad
+algorithms. If there were a little guy running around inside the
+computer executing our programs, he would probably have as long
+and plaintive a tale to tell about his job as a federal government
+employee. I often have a feeling that I'm sending the processor on
+a lot of wild goose chases, but I've never had a good way to look
+at what it's doing.A number of Lisps now compile into byte code, which is then executed
+by an interpreter. This is usually done to make the implementation
+easier to port, but it could be a useful language feature. It might
+be a good idea to make the byte code an official part of the
+language, and to allow programmers to use inline byte code in
+bottlenecks. Then such optimizations would be portable too.The nature of speed, as perceived by the end-user, may be changing.
+With the rise of server-based applications, more and more programs
+may turn out to be i/o-bound. It will be worth making i/o fast.
+The language can help with straightforward measures like simple,
+fast, formatted output functions, and also with deep structural
+changes like caching and persistent objects.Users are interested in response time. But another kind of efficiency
+will be increasingly important: the number of simultaneous users
+you can support per processor. Many of the interesting applications
+written in the near future will be server-based, and the number of
+users per server is the critical question for anyone hosting such
+applications. In the capital cost of a business offering a server-based
+application, this is the divisor.For years, efficiency hasn't mattered much in most end-user
+applications. Developers have been able to assume that each user
+would have an increasingly powerful processor sitting on their
+desk. And by Parkinson's Law, software has expanded to use the
+resources available. That will change with server-based applications.
+In that world, the hardware and software will be supplied together.
+For companies that offer server-based applications, it will make
+a very big difference to the bottom line how many users they can
+support per server.In some applications, the processor will be the limiting factor,
+and execution speed will be the most important thing to optimize.
+But often memory will be the limit; the number of simultaneous
+users will be determined by the amount of memory you need for each
+user's data. The language can help here too. Good support for
+threads will enable all the users to share a single heap. It may
+also help to have persistent objects and/or language level support
+for lazy loading.9 TimeThe last ingredient a popular language needs is time. No one wants
+to write programs in a language that might go away, as so many
+programming languages do. So most hackers will tend to wait until
+a language has been around for a couple years before even considering
+using it.Inventors of wonderful new things are often surprised to discover
+this, but you need time to get any message through to people. A
+friend of mine rarely does anything the first time someone asks
+him. He knows that people sometimes ask for things that they turn
+out not to want. To avoid wasting his time, he waits till the third
+or fourth time he's asked to do something; by then, whoever's asking
+him may be fairly annoyed, but at least they probably really do
+want whatever they're asking for.Most people have learned to do a similar sort of filtering on new
+things they hear about. They don't even start paying attention
+until they've heard about something ten times. They're perfectly
+justified: the majority of hot new whatevers do turn out to be a
+waste of time, and eventually go away. By delaying learning VRML,
+I avoided having to learn it at all.So anyone who invents something new has to expect to keep repeating
+their message for years before people will start to get it. We
+wrote what was, as far as I know, the first web-server based
+application, and it took us years to get it through to people that
+it didn't have to be downloaded. It wasn't that they were stupid.
+They just had us tuned out.The good news is, simple repetition solves the problem. All you
+have to do is keep telling your story, and eventually people will
+start to hear. It's not when people notice you're there that they
+pay attention; it's when they notice you're still there.It's just as well that it usually takes a while to gain momentum.
+Most technologies evolve a good deal even after they're first
+launched — programming languages especially. Nothing could be better,
+for a new techology, than a few years of being used only by a small
+number of early adopters. Early adopters are sophisticated and
+demanding, and quickly flush out whatever flaws remain in your
+technology. When you only have a few users you can be in close
+contact with all of them. And early adopters are forgiving when
+you improve your system, even if this causes some breakage.There are two ways new technology gets introduced: the organic
+growth method, and the big bang method. The organic growth method
+is exemplified by the classic seat-of-the-pants underfunded garage
+startup. A couple guys, working in obscurity, develop some new
+technology. They launch it with no marketing and initially have
+only a few (fanatically devoted) users. They continue to improve
+the technology, and meanwhile their user base grows by word of
+mouth. Before they know it, they're big.The other approach, the big bang method, is exemplified by the
+VC-backed, heavily marketed startup. They rush to develop a product,
+launch it with great publicity, and immediately (they hope) have
+a large user base.Generally, the garage guys envy the big bang guys. The big bang
+guys are smooth and confident and respected by the VCs. They can
+afford the best of everything, and the PR campaign surrounding the
+launch has the side effect of making them celebrities. The organic
+growth guys, sitting in their garage, feel poor and unloved. And
+yet I think they are often mistaken to feel sorry for themselves.
+Organic growth seems to yield better technology and richer founders
+than the big bang method. If you look at the dominant technologies
+today, you'll find that most of them grew organically.This pattern doesn't only apply to companies. You see it in sponsored
+research too. Multics and Common Lisp were big-bang projects, and
+Unix and MacLisp were organic growth projects.10 Redesign"The best writing is rewriting," wrote E. B. White. Every good
+writer knows this, and it's true for software too. The most important
+part of design is redesign. Programming languages, especially,
+don't get redesigned enough.To write good software you must simultaneously keep two opposing
+ideas in your head. You need the young hacker's naive faith in
+his abilities, and at the same time the veteran's skepticism. You
+have to be able to think
+how hard can it be? with one half of
+your brain while thinking
+it will never work with the other.The trick is to realize that there's no real contradiction here.
+You want to be optimistic and skeptical about two different things.
+You have to be optimistic about the possibility of solving the
+problem, but skeptical about the value of whatever solution you've
+got so far.People who do good work often think that whatever they're working
+on is no good. Others see what they've done and are full of wonder,
+but the creator is full of worry. This pattern is no coincidence:
+it is the worry that made the work good.If you can keep hope and worry balanced, they will drive a project
+forward the same way your two legs drive a bicycle forward. In the
+first phase of the two-cycle innovation engine, you work furiously
+on some problem, inspired by your confidence that you'll be able
+to solve it. In the second phase, you look at what you've done in
+the cold light of morning, and see all its flaws very clearly. But
+as long as your critical spirit doesn't outweigh your hope, you'll
+be able to look at your admittedly incomplete system, and think,
+how hard can it be to get the rest of the way?, thereby continuing
+the cycle.It's tricky to keep the two forces balanced. In young hackers,
+optimism predominates. They produce something, are convinced it's
+great, and never improve it. In old hackers, skepticism predominates,
+and they won't even dare to take on ambitious projects.Anything you can do to keep the redesign cycle going is good. Prose
+can be rewritten over and over until you're happy with it. But
+software, as a rule, doesn't get redesigned enough. Prose has
+readers, but software has users. If a writer rewrites an essay,
+people who read the old version are unlikely to complain that their
+thoughts have been broken by some newly introduced incompatibility.Users are a double-edged sword. They can help you improve your
+language, but they can also deter you from improving it. So choose
+your users carefully, and be slow to grow their number. Having
+users is like optimization: the wise course is to delay it. Also,
+as a general rule, you can at any given time get away with changing
+more than you think. Introducing change is like pulling off a
+bandage: the pain is a memory almost as soon as you feel it.Everyone knows that it's not a good idea to have a language designed
+by a committee. Committees yield bad design. But I think the worst
+danger of committees is that they interfere with redesign. It is
+so much work to introduce changes that no one wants to bother.
+Whatever a committee decides tends to stay that way, even if most
+of the members don't like it.Even a committee of two gets in the way of redesign. This happens
+particularly in the interfaces between pieces of software written
+by two different people. To change the interface both have to agree
+to change it at once. And so interfaces tend not to change at all,
+which is a problem because they tend to be one of the most ad hoc
+parts of any system.One solution here might be to design systems so that interfaces
+are horizontal instead of vertical — so that modules are always
+vertically stacked strata of abstraction. Then the interface will
+tend to be owned by one of them. The lower of two levels will either
+be a language in which the upper is written, in which case the
+lower level will own the interface, or it will be a slave, in which
+case the interface can be dictated by the upper level.11 LispWhat all this implies is that there is hope for a new Lisp. There
+is hope for any language that gives hackers what they want, including
+Lisp. I think we may have made a mistake in thinking that hackers
+are turned off by Lisp's strangeness. This comforting illusion may
+have prevented us from seeing the real problem with Lisp, or at
+least Common Lisp, which is that it sucks for doing what hackers
+want to do. A hacker's language needs powerful libraries and
+something to hack. Common Lisp has neither. A hacker's language is
+terse and hackable. Common Lisp is not.The good news is, it's not Lisp that sucks, but Common Lisp. If we
+can develop a new Lisp that is a real hacker's language, I think
+hackers will use it. They will use whatever language does the job.
+All we have to do is make sure this new Lisp does some important
+job better than other languages.History offers some encouragement. Over time, successive new
+programming languages have taken more and more features from Lisp.
+There is no longer much left to copy before the language you've
+made is Lisp. The latest hot language, Python, is a watered-down
+Lisp with infix syntax and no macros. A new Lisp would be a natural
+step in this progression.I sometimes think that it would be a good marketing trick to call
+it an improved version of Python. That sounds hipper than Lisp. To
+many people, Lisp is a slow AI language with a lot of parentheses.
+Fritz Kunze's official biography carefully avoids mentioning the
+L-word. But my guess is that we shouldn't be afraid to call the
+new Lisp Lisp. Lisp still has a lot of latent respect among the
+very best hackers — the ones who took 6.001 and understood it, for
+example. And those are the users you need to win.In "How to Become a Hacker," Eric Raymond describes Lisp as something
+like Latin or Greek — a language you should learn as an intellectual
+exercise, even though you won't actually use it:
+
+ Lisp is worth learning for the profound enlightenment experience
+ you will have when you finally get it; that experience will make
+ you a better programmer for the rest of your days, even if you
+ never actually use Lisp itself a lot.
+
+If I didn't know Lisp, reading this would set me asking questions.
+A language that would make me a better programmer, if it means
+anything at all, means a language that would be better for programming.
+And that is in fact the implication of what Eric is saying.As long as that idea is still floating around, I think hackers will
+be receptive enough to a new Lisp, even if it is called Lisp. But
+this Lisp must be a hacker's language, like the classic Lisps of
+the 1970s. It must be terse, simple, and hackable. And it must have
+powerful libraries for doing what hackers want to do now.In the matter of libraries I think there is room to beat languages
+like Perl and Python at their own game. A lot of the new applications
+that will need to be written in the coming years will be
+server-based
+applications. There's no reason a new Lisp shouldn't have string
+libraries as good as Perl, and if this new Lisp also had powerful
+libraries for server-based applications, it could be very popular.
+Real hackers won't turn up their noses at a new tool that will let
+them solve hard problems with a few library calls. Remember, hackers
+are lazy.It could be an even bigger win to have core language support for
+server-based applications. For example, explicit support for programs
+with multiple users, or data ownership at the level of type tags.Server-based applications also give us the answer to the question
+of what this new Lisp will be used to hack. It would not hurt to
+make Lisp better as a scripting language for Unix. (It would be
+hard to make it worse.) But I think there are areas where existing
+languages would be easier to beat. I think it might be better to
+follow the model of Tcl, and supply the Lisp together with a complete
+system for supporting server-based applications. Lisp is a natural
+fit for server-based applications. Lexical closures provide a way
+to get the effect of subroutines when the ui is just a series of
+web pages. S-expressions map nicely onto html, and macros are good
+at generating it. There need to be better tools for writing
+server-based applications, and there needs to be a new Lisp, and
+the two would work very well together.12 The Dream LanguageBy way of summary, let's try describing the hacker's dream language.
+The dream language is
+beautiful, clean, and terse. It has an
+interactive toplevel that starts up fast. You can write programs
+to solve common problems with very little code. Nearly all the
+code in any program you write is code that's specific to your
+application. Everything else has been done for you.The syntax of the language is brief to a fault. You never have to
+type an unnecessary character, or even to use the shift key much.Using big abstractions you can write the first version of a program
+very quickly. Later, when you want to optimize, there's a really
+good profiler that tells you where to focus your attention. You
+can make inner loops blindingly fast, even writing inline byte code
+if you need to.There are lots of good examples to learn from, and the language is
+intuitive enough that you can learn how to use it from examples in
+a couple minutes. You don't need to look in the manual much. The
+manual is thin, and has few warnings and qualifications.The language has a small core, and powerful, highly orthogonal
+libraries that are as carefully designed as the core language. The
+libraries all work well together; everything in the language fits
+together like the parts in a fine camera. Nothing is deprecated,
+or retained for compatibility. The source code of all the libraries
+is readily available. It's easy to talk to the operating system
+and to applications written in other languages.The language is built in layers. The higher-level abstractions are
+built in a very transparent way out of lower-level abstractions,
+which you can get hold of if you want.Nothing is hidden from you that doesn't absolutely have to be. The
+language offers abstractions only as a way of saving you work,
+rather than as a way of telling you what to do. In fact, the language
+encourages you to be an equal participant in its design. You can
+change everything about it, including even its syntax, and anything
+you write has, as much as possible, the same status as what comes
+predefined.Notes[1] Macros very close to the modern idea were proposed by Timothy
+Hart in 1964, two years after Lisp 1.5 was released. What was
+missing, initially, were ways to avoid variable capture and multiple
+evaluation; Hart's examples are subject to both.[2] In When the Air Hits Your Brain, neurosurgeon Frank Vertosick
+recounts a conversation in which his chief resident, Gary, talks
+about the difference between surgeons and internists ("fleas"):
+
+ Gary and I ordered a large pizza and found an open booth. The
+ chief lit a cigarette. "Look at those goddamn fleas, jabbering
+ about some disease they'll see once in their lifetimes. That's
+ the trouble with fleas, they only like the bizarre stuff. They
+ hate their bread and butter cases. That's the difference between
+ us and the fucking fleas. See, we love big juicy lumbar disc
+ herniations, but they hate hypertension...."
+
+It's hard to think of a lumbar disc herniation as juicy (except
+literally). And yet I think I know what they mean. I've often had
+a juicy bug to track down. Someone who's not a programmer would
+find it hard to imagine that there could be pleasure in a bug.
+Surely it's better if everything just works. In one way, it is.
+And yet there is undeniably a grim satisfaction in hunting down
+certain sorts of bugs.
+```
\ No newline at end of file
diff --git a/out_tensor/lm_head.safetensors b/out_tensor/lm_head.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f5eb6e3f1e4edb5ad27f4b1394d520be6e6d31e
--- /dev/null
+++ b/out_tensor/lm_head.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:960dd347a1d286416936631b5cc2a316e3e67a3e98b995963e78789e7fb270bd
+size 103953008
diff --git a/out_tensor/model.layers.0.mlp.down_proj.safetensors b/out_tensor/model.layers.0.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..de8141807d53e78d4315cf2af1c5c5541f52483a
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4540add4d390fe1c68bc51092927d72ca5c67b3a60e198e0ac39bad37b18020f
+size 39546008
diff --git a/out_tensor/model.layers.0.mlp.gate_proj.safetensors b/out_tensor/model.layers.0.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..206ab1403246690666d9767c07f094be203bfb41
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7736ca0fde60e33a5e1c50595a23af5f7b3705c52c897492e3c74df93b6d208a
+size 38380824
diff --git a/out_tensor/model.layers.0.mlp.up_proj.safetensors b/out_tensor/model.layers.0.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2dea2a209c4a5f6b4903a2df392d32080994aa0d
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8ef367e27c78dfac1dbca7dca5c34800847151955995a34f971cb4e16d09cc0
+size 39470352
diff --git a/out_tensor/model.layers.0.self_attn.k_proj.safetensors b/out_tensor/model.layers.0.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4147ae97daf57b0d9fc2b16b6f1c529c96febebc
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:794e73a6f31b470f15dd2f20f4dddba36445a8d0815f93255f20089b9bf6f47d
+size 3228960
diff --git a/out_tensor/model.layers.0.self_attn.o_proj.safetensors b/out_tensor/model.layers.0.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e31e9fbe2a596174bcd5793d3f5319fc3e801247
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2955c47b51e4dc73e99b673a3568dd7430ad85a53e2f90ab96eda2730b87c01f
+size 12862760
diff --git a/out_tensor/model.layers.0.self_attn.q_proj.safetensors b/out_tensor/model.layers.0.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9076fc61554d017a220cab40f977e1aba707ade4
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d182602dc5ccfa3ebca7689543861b14a7889bfb22b12b6be099eedbe1ce1673
+size 12862760
diff --git a/out_tensor/model.layers.0.self_attn.v_proj.safetensors b/out_tensor/model.layers.0.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b8b1655d7ca3c1c656752ae47a74df9b80c9cba
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:195c18c8746ba519136fea054587686567bcc5689bfba081b9c23a4dbc1ddb5c
+size 4277536
diff --git a/out_tensor/model.layers.1.mlp.down_proj.safetensors b/out_tensor/model.layers.1.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e6f1608d7eda67b7138af58ce27f9114f5a2e5cb
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa8a6b43122334a204d0538b1b27618c8baa5d68211e679de26acba678b47c43
+size 25619608
diff --git a/out_tensor/model.layers.1.mlp.gate_proj.safetensors b/out_tensor/model.layers.1.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ccbbc0ef34e0d3c57de5bb5f4f989f340249b656
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fd05d1a2a3aacd4eb855f8bff97e16c46a826029f8bef986184d0f1542652c1
+size 23700760
diff --git a/out_tensor/model.layers.1.mlp.up_proj.safetensors b/out_tensor/model.layers.1.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f5f8a9a0bcc61af1bc3b433ef5db5b319fd91cef
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8562c0f410344976ae61582fa65b77d2e9979259b9ee4f9e7e63b28f4c0b844
+size 24790288
diff --git a/out_tensor/model.layers.1.self_attn.k_proj.safetensors b/out_tensor/model.layers.1.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9f81625b2bc416522f4d829bb9b6a9002d6d4f54
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:583d93844dfe9fcff32c3acfdb98a14ad2b47e0dc3315aa57fce0c6f61d99ac6
+size 3228960
diff --git a/out_tensor/model.layers.1.self_attn.o_proj.safetensors b/out_tensor/model.layers.1.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..65f9a8d241ff02279885d290322d27355ff0281c
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91a20eef7842748ade00466d840a0a25437120c2b134ff5f2decf6463360d605
+size 12862760
diff --git a/out_tensor/model.layers.1.self_attn.q_proj.safetensors b/out_tensor/model.layers.1.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2e487135727972c048a7d9fab271af90643c96d
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ace6e512e1b28ed44e0913fcafda77a8b43d1b53593fd115a04b943e2148fed
+size 12862760
diff --git a/out_tensor/model.layers.1.self_attn.v_proj.safetensors b/out_tensor/model.layers.1.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29c014871bb1452f790ca917a7fb1956f5c82949
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:965179024657ede6735183b5f5f1ed9369083470da5c7349646b65544c9aec73
+size 4277536
diff --git a/out_tensor/model.layers.10.mlp.down_proj.safetensors b/out_tensor/model.layers.10.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95b2f8b180d42a23ee59a61270fffa3f815c0b84
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be065acb17d2b30e5f7fc64f4162852bbb06bdb422a0832a00ce22b70bbcc3ec
+size 25619616
diff --git a/out_tensor/model.layers.10.mlp.gate_proj.safetensors b/out_tensor/model.layers.10.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f2452af738fb77739576ea45fc9547a5232c6d94
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f210d34ba8a6dd886422f4fef3e81032742fffbf88f71b69c08a0e14662d0d6c
+size 23700768
diff --git a/out_tensor/model.layers.10.mlp.up_proj.safetensors b/out_tensor/model.layers.10.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a0a32b06a13d09dea0dc3099338da38cbedcc5c0
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1a197f77b4a79a8bc71308293891bdb8ef6239bba3991cf5e0d18c3487c623
+size 24790288
diff --git a/out_tensor/model.layers.10.self_attn.k_proj.safetensors b/out_tensor/model.layers.10.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..099143ba620a39f47ebbdd61578481ac9e17bf34
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9690b1ad76e5f0da516d3f849ee5c21958fea43017beea730c0d53d1393f47d0
+size 1156000
diff --git a/out_tensor/model.layers.10.self_attn.o_proj.safetensors b/out_tensor/model.layers.10.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8cb0b67cf25c19fe72df56522159f24eea66ac2b
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16551a4f801e0c2805379f180ae7ad8bacd3efe5de9f5744fb3a496dd6ce5efc
+size 4572064
diff --git a/out_tensor/model.layers.10.self_attn.q_proj.safetensors b/out_tensor/model.layers.10.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..636c0d742845448377d09ac7c3be4c13767e135a
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d51f74b264dd1b7c98b601ce13dfd062481d480e3251dfb654be339acb1387c5
+size 4572064
diff --git a/out_tensor/model.layers.10.self_attn.v_proj.safetensors b/out_tensor/model.layers.10.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a0dcb86f8323ba5d1cdf52dc0f1d4120ad1a64f
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e855e22790285dc4c35dcd5a72025138d427221bd28fa16332672eb65be747
+size 1671904
diff --git a/out_tensor/model.layers.11.mlp.down_proj.safetensors b/out_tensor/model.layers.11.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f556a56e883c1fd993d3c23b7fd7bfabcc84fa5
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d59cc1531442d4b0e6e9b83256926c8356041084d1082f630c6caad7399fe3d
+size 32582816
diff --git a/out_tensor/model.layers.11.mlp.gate_proj.safetensors b/out_tensor/model.layers.11.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..698803c37f11dfb588e8bb9d2b2e0e27c98a41f6
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:410f2aa5ab1152923279adad567b990bf6f7f9b68c36bb97f8a3a6f1a79f910b
+size 31040800
diff --git a/out_tensor/model.layers.11.mlp.up_proj.safetensors b/out_tensor/model.layers.11.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e69510438799aa56e58849b1a82e12055e22308c
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7106ef91edaf3f6e0987e13f957b802da3549469109f7b0e44fd6d7644eb714e
+size 32130320
diff --git a/out_tensor/model.layers.11.self_attn.k_proj.safetensors b/out_tensor/model.layers.11.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d63837413aecf3e5e5cd3a214e34c5313fe22e4a
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bf86bb51499378cc9b26659d83ca1941e1ac2664d7f74cd8e22ab9c5127e16c
+size 2204576
diff --git a/out_tensor/model.layers.11.self_attn.o_proj.safetensors b/out_tensor/model.layers.11.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a29ae6c3f522e17ee1fbed40ac458d73a98b629d
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cae492285d3b271f02973f2a311b3c3dcac34cd60982052e753c97b5f90efb62
+size 8766368
diff --git a/out_tensor/model.layers.11.self_attn.q_proj.safetensors b/out_tensor/model.layers.11.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9f514e1da4b0fbbe941f4dbaa9a6357e76882654
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68a1a8a91fbcd029ee7e0b321084519b85fe2e7ca26180899573a32430b03198
+size 8766368
diff --git a/out_tensor/model.layers.11.self_attn.v_proj.safetensors b/out_tensor/model.layers.11.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d65eab17dbb195be73733d1dad9c79f20821d5a7
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbec78df2323e72c6f25ce089328d757e8d802a1128617c526f59e3e2f43d187
+size 2233632
diff --git a/out_tensor/model.layers.12.mlp.down_proj.safetensors b/out_tensor/model.layers.12.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4cb01012b352c22feb793e55fcfe3df6f4872789
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b6c015507a2463975e7752bac34fa9ee1d991c3098860d62dbe01382795e41b
+size 39546016
diff --git a/out_tensor/model.layers.12.mlp.gate_proj.safetensors b/out_tensor/model.layers.12.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f2220515daf6234829dea9048ce59ba508bd093d
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f135c797037ce482713c3d4841a94dc7757b70dbe92f2621eb190f12ad3366db
+size 38380832
diff --git a/out_tensor/model.layers.12.mlp.up_proj.safetensors b/out_tensor/model.layers.12.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..21fb32e5e061eda6c0140a3c3a9b143145124d77
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0024b549236f4f5373889ff6a926c7bca6e423fb8358973860f10174e5582c1b
+size 39470352
diff --git a/out_tensor/model.layers.12.self_attn.k_proj.safetensors b/out_tensor/model.layers.12.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..85b15d49366b93a156e9cd9b9b90a4d024499685
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9164cd4dfb8fbea4534e1704a13725e8f33ceb1f124735964fc729d9703686c
+size 2233632
diff --git a/out_tensor/model.layers.12.self_attn.o_proj.safetensors b/out_tensor/model.layers.12.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e475d629a22d57e3006bc39166b63263c2531fd
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2544d77548e85b34fb784888b15c748251d89489389339ecb87e562201d353f
+size 8881448
diff --git a/out_tensor/model.layers.12.self_attn.q_proj.safetensors b/out_tensor/model.layers.12.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2e8653e3deb6e5f547e7fb5e50141915b6b36d6e
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7040498aa8c6ba79c072ff2993ff56fc93fd35f23941ae0597d7652a0fb18e11
+size 8881448
diff --git a/out_tensor/model.layers.12.self_attn.v_proj.safetensors b/out_tensor/model.layers.12.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4b39df01b42de49df76c013d21ce933587c05010
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c174bec64fe0a43792b3c73fe110242dcd1efc48f91c4aff2aabc11b04a23713
+size 2704672
diff --git a/out_tensor/model.layers.13.mlp.down_proj.safetensors b/out_tensor/model.layers.13.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc8d22bec629b6e4732b5332318740b7880663e1
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7840f5af1411b097c8307ae52f24e560bd5a10d8ebdbf4679d2341a4052a9519
+size 39546016
diff --git a/out_tensor/model.layers.13.mlp.gate_proj.safetensors b/out_tensor/model.layers.13.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30fe2bb0416be8ea030ea1f735c1686809c73ac6
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:112f876c0b23cf88d7fcbd6470f459ad0cf918e1544259133f8bf11bf8e4a97a
+size 38380832
diff --git a/out_tensor/model.layers.13.mlp.up_proj.safetensors b/out_tensor/model.layers.13.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..135b68a53ea037b08a31bb6af64ef928c79fff41
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:247993bfad622e03fa0e61afe58c427842742e51e93a7cea260e5c7fc032eab1
+size 39470352
diff --git a/out_tensor/model.layers.13.self_attn.k_proj.safetensors b/out_tensor/model.layers.13.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d42f9bb2fd7f89e4488b2579093afca599e248ce
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edf175487b2e592adda5ead825e079d7df2412d7903210cfa7fe21fd0666b27a
+size 3228960
diff --git a/out_tensor/model.layers.13.self_attn.o_proj.safetensors b/out_tensor/model.layers.13.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8db27c5ff068c4eff19b25a683c1968b8700460c
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:755ee3c036ae938770cea86defe25bccb0140c0c9f8471e9d3072e864366e689
+size 12862760
diff --git a/out_tensor/model.layers.13.self_attn.q_proj.safetensors b/out_tensor/model.layers.13.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..11296ce217a9463909599b9a0a7b5ccd156874e4
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d4039b50105eddbe4295d9a28ad1c65814e5a64cfe5d35fe2f471ea6bf416e8
+size 12862760
diff --git a/out_tensor/model.layers.13.self_attn.v_proj.safetensors b/out_tensor/model.layers.13.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fcfadd79383439d6fd9092c313e4d45de61a7293
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e19a2c5b0700ebbf113342bcbad777e12fe6a077f25c7673d2ce586000d3678c
+size 4277536
diff --git a/out_tensor/model.layers.14.mlp.down_proj.safetensors b/out_tensor/model.layers.14.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..40bdf119b4dc82c1916e03a14ad56c80c80e6de9
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1e0e7572518f79067752bdcd89debf243446a5d4b66bd6097003d2af447f7a
+size 39546016
diff --git a/out_tensor/model.layers.14.mlp.gate_proj.safetensors b/out_tensor/model.layers.14.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e2eefeba9b6c3ad8adcad0c6d9d786615304022b
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b29c7b38ea5ba17d6480ff88fc31dbbf3e70edbe75678cddf3ec079e569334c1
+size 38380832
diff --git a/out_tensor/model.layers.14.mlp.up_proj.safetensors b/out_tensor/model.layers.14.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb1ac722f81005aebecb7d2216f9b2db3dd22672
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9215a39bab80b2de6192f751a4517bf512d1f6bc67f0e31c2f2e3741451f028f
+size 39470352
diff --git a/out_tensor/model.layers.14.self_attn.k_proj.safetensors b/out_tensor/model.layers.14.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..73c9de643b5dd2ce015ff3a35a7c3a8f991c93b5
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09214c6ae7f4d0b7c78876ac0e082d307f4f73b5b0dc2c181e45015eb4c82309
+size 3228960
diff --git a/out_tensor/model.layers.14.self_attn.o_proj.safetensors b/out_tensor/model.layers.14.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6388116000280cb6c56afa9066ed5daaf2f485dd
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9c471b728cddd14d2b3634ce1ab6ed29b11d7f730042c0ae03d31d1762777b1
+size 12862760
diff --git a/out_tensor/model.layers.14.self_attn.q_proj.safetensors b/out_tensor/model.layers.14.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49fb3d9b06fae2d21041b3767279765ea2790872
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9d288e4387350010ebbe115855324db3dac2f74adaae490377c9cb2759327a
+size 12862760
diff --git a/out_tensor/model.layers.14.self_attn.v_proj.safetensors b/out_tensor/model.layers.14.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1d9b8b57899a185f130877b0b6920422fd5903a
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d56e08229b47925476a8867c868b85db6c570b2435fdaf8c76dc673229bca8a0
+size 4277536
diff --git a/out_tensor/model.layers.15.mlp.down_proj.safetensors b/out_tensor/model.layers.15.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..21dc9a960af6768e70798f916f394d8222548b1b
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f6e73577e912562cab5980a0898cfacfe385e4d0b793056ed35a468ae2af816
+size 39546016
diff --git a/out_tensor/model.layers.15.mlp.gate_proj.safetensors b/out_tensor/model.layers.15.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b97633e07d2f3581ac775f7aea52827e3fcd6d9d
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78329108cb8c0597cac4e03e5a110476936ceeb75f2d2ab4f707139f41df9be2
+size 38380832
diff --git a/out_tensor/model.layers.15.mlp.up_proj.safetensors b/out_tensor/model.layers.15.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd3ad3704d5804364da933ac3f02bbf91fe08985
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b76eaac76b8148234b85823ea4f0815a7bffda470da8a5391ae3d1f5ca1f805
+size 39470352
diff --git a/out_tensor/model.layers.15.self_attn.k_proj.safetensors b/out_tensor/model.layers.15.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cabe0c4dcbc29c4a8dbd9428a8ba28e8052aaeaf
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd9edcbeca2644820d3d975b6fb4031cb907ff4afec602c9f981c5ba63a15749
+size 3228960
diff --git a/out_tensor/model.layers.15.self_attn.o_proj.safetensors b/out_tensor/model.layers.15.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8068b075d717d95915f968b42b40a1b317ac8e85
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e98339c9aa3c4b39a34e68042eaddce214b61e8dcc89d3591389882e5154eb6a
+size 12862760
diff --git a/out_tensor/model.layers.15.self_attn.q_proj.safetensors b/out_tensor/model.layers.15.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4016d6d00c3d6632bd77112b36f3d6fd29417f9b
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3d72848edb13e8237b360fbff69f3563d264e4a83b75ba18208d440e7749277
+size 12862760
diff --git a/out_tensor/model.layers.15.self_attn.v_proj.safetensors b/out_tensor/model.layers.15.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6d88bc0f8be7aa1b5a2b55253f18eb1b6a9fb433
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2b24989db294eadd8509ff84a3f45db6e0a6d3922f769401c6a60d36097aca6
+size 4277536
diff --git a/out_tensor/model.layers.16.mlp.down_proj.safetensors b/out_tensor/model.layers.16.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae7262c4e69636981d8a0a3f91623d6f1924bea7
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82d46d06c837f0cc8d8976f5c92cbcb9d9b555bb2961bd408b97730ce8684ba6
+size 31192364
diff --git a/out_tensor/model.layers.16.mlp.gate_proj.safetensors b/out_tensor/model.layers.16.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..88dc0a68b6062b471409e8e8dcfae1f174cdaf57
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df946280c1f77edbabb2efa0ae75bfaf01105d2836155bbd440c5b13fb839458
+size 23700768
diff --git a/out_tensor/model.layers.16.mlp.up_proj.safetensors b/out_tensor/model.layers.16.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea90112e7e42edfdfbfbbb382cb33b7cf6cffa27
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0752a061146e9ac8a66125299859d21e5e1f5bb9d3ef0825bbdd3698fd80acf
+size 24790288
diff --git a/out_tensor/model.layers.16.self_attn.k_proj.safetensors b/out_tensor/model.layers.16.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ead1b8034c096e2d5319e34f6f98e87813d0103d
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:924edd95f431a669c84de8522cffe87107ae9e3f7cb45892d26a177dca11ef72
+size 1671904
diff --git a/out_tensor/model.layers.16.self_attn.o_proj.safetensors b/out_tensor/model.layers.16.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9350b9f9310ae34c0809e6faae7ddbaa0683beca
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c1f9135763585a8c491c8d8ccc8fbbbdddaf0c48ea4bc2407ea61a764a9736
+size 6636256
diff --git a/out_tensor/model.layers.16.self_attn.q_proj.safetensors b/out_tensor/model.layers.16.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1ab79e221a8617df9ed72756c36eeed6bec5402
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74e9c20f434eb0fad482995ac68788b27f4ad5bf3e389d391f1c789e86b987fb
+size 6636256
diff --git a/out_tensor/model.layers.16.self_attn.v_proj.safetensors b/out_tensor/model.layers.16.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..09e17b3fde699686c4cdae37fe767798a180fcd8
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66dd326c0cf5c748b9071c0880d11d06bd4e2a589826228b4f4ae78178472891
+size 1680288
diff --git a/out_tensor/model.layers.17.mlp.down_proj.safetensors b/out_tensor/model.layers.17.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..796696422825115e5f3f3476a93ab35ffadff728
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5002b35e070548fd4ddcdb7addae1da2e1e0c2bc4b8802fc875eaa2ad7906df7
+size 32582816
diff --git a/out_tensor/model.layers.17.mlp.gate_proj.safetensors b/out_tensor/model.layers.17.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..66ab39c3b99fd73f02664f62f31b98c3a4f1cce0
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89a0121af10147c6533f382e578cb3bd5bc808256418dbab285a4ab7eb560caf
+size 31040800
diff --git a/out_tensor/model.layers.17.mlp.up_proj.safetensors b/out_tensor/model.layers.17.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..84159ca293097e5b20062f2611d2704116fd19dc
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52a591a8d228b32cec2c87e97767aea3de35361d6611c5c0d6dccf1e1ec4fff2
+size 32130320
diff --git a/out_tensor/model.layers.17.self_attn.k_proj.safetensors b/out_tensor/model.layers.17.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..35302b490b3392f1a71601e2e8e4dad00a5364bb
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9fd5f077be9b09e259ea791c2a3f1047328c355905347dbb65236c945691853
+size 2233632
diff --git a/out_tensor/model.layers.17.self_attn.o_proj.safetensors b/out_tensor/model.layers.17.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f4f2faf673367acc7473b50c3cde4dcde191c61
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09d1d5d6d7b8d1163aec1745d0fe0df51bd4716295c551beda43bb07c4ad8ce4
+size 8881448
diff --git a/out_tensor/model.layers.17.self_attn.q_proj.safetensors b/out_tensor/model.layers.17.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a0a4e6d535dbe062f6d52f2fdb9b246ba5b95ed4
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8eaf139ebe5d14c4ec2b1bdba3c226308acd7de1985ed7edfe1b673fe1d272a
+size 8881448
diff --git a/out_tensor/model.layers.17.self_attn.v_proj.safetensors b/out_tensor/model.layers.17.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b7a435c711b1635d46bc0c44df16c8ee063a6601
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:328575c357eec21b3942eb59d5970d97b8f09930389916185f9d1149d0bb77c8
+size 2704672
diff --git a/out_tensor/model.layers.18.mlp.down_proj.safetensors b/out_tensor/model.layers.18.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43827b032dad478d9ebde8d4082924ad47c437b6
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:023529557333337f60b932839b02c3314b3264fbed7c857cbe360f2466389063
+size 39546016
diff --git a/out_tensor/model.layers.18.mlp.gate_proj.safetensors b/out_tensor/model.layers.18.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5fd7d2af497187874db62e65065c6c8de78edfb1
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d49f32480de123f291a1b9e03ba668e866195c90c4f67ea954538af190f34b8a
+size 38380832
diff --git a/out_tensor/model.layers.18.mlp.up_proj.safetensors b/out_tensor/model.layers.18.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2885ddd817f8e6660b2dcb2ea5bd448e9dab85cf
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6dce4c6711cec58e54a355354480cddae3e3b51f1cf814b7ff758b654676527
+size 39470352
diff --git a/out_tensor/model.layers.18.self_attn.k_proj.safetensors b/out_tensor/model.layers.18.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4ca5c29d8ef6c8e92b0712db997117d06b601a5
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:959363356be1083b0360194d36be257d36e157de382c27ae6440072f993dd28e
+size 3228960
diff --git a/out_tensor/model.layers.18.self_attn.o_proj.safetensors b/out_tensor/model.layers.18.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc4432bc911f79defc2e28816800ccbeacdade82
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:197577aeae2b23be715d71b6fe1bd1087b0c0a69341d5417aa0603f0be19ac34
+size 12862760
diff --git a/out_tensor/model.layers.18.self_attn.q_proj.safetensors b/out_tensor/model.layers.18.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ecf94207d8f5f8af252dac3006cfedb592b11237
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e90b2728844aa413a765fa368bb4b643dc0aeb125bca61baef393df9e56b5466
+size 12862760
diff --git a/out_tensor/model.layers.18.self_attn.v_proj.safetensors b/out_tensor/model.layers.18.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b53fd5938d25af9327520f036e505e68a9c95b92
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c0aa1d18167f99be3bad839f428d0511295a011a7acee76b6941dade6cb52e7
+size 4277536
diff --git a/out_tensor/model.layers.19.mlp.down_proj.safetensors b/out_tensor/model.layers.19.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d5a69545b77e074240bc52b9d3892e109a5d760f
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc8cd26a6d1cbfa50eaaaca8d2bd4942e28364a33de930300079f5ad8fb587ba
+size 39546016
diff --git a/out_tensor/model.layers.19.mlp.gate_proj.safetensors b/out_tensor/model.layers.19.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c10bcd6603d3d68b1a674650ef002f3bdd492017
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dc2efdb38e8d0fe9003ffc16e9ce1cb3b50acf1a5882d1b1231019883808e84
+size 38380832
diff --git a/out_tensor/model.layers.19.mlp.up_proj.safetensors b/out_tensor/model.layers.19.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d044652256fcb4920a5aca5dd4b28558c9c07b3d
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7772d7846cbfa22be4f30bb2ed273496ec2c7740552df82f8ec5a6779843ab
+size 39470352
diff --git a/out_tensor/model.layers.19.self_attn.k_proj.safetensors b/out_tensor/model.layers.19.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c96f7ea8388db97cfbd0d8343bfe62cd4ac40e33
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:998f01f9ad86a9a7258c0e15a4baff78594316fe4c2c6dcdcc28cbed6f9fac66
+size 3228960
diff --git a/out_tensor/model.layers.19.self_attn.o_proj.safetensors b/out_tensor/model.layers.19.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d6325af2b5160c18b21fddeca31b10ebb014a096
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5775624f92b2fbaf8c06c28e1863a9fec7f93097d7849bd37549cd059a2820f
+size 12862760
diff --git a/out_tensor/model.layers.19.self_attn.q_proj.safetensors b/out_tensor/model.layers.19.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3e2263601cb31e0b954d60233554bdbb75dc16c
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47835f302b43416c062f906e0b132e5af77df26691fed4692605dcef7d14f38e
+size 12862760
diff --git a/out_tensor/model.layers.19.self_attn.v_proj.safetensors b/out_tensor/model.layers.19.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..592aa30ac60009dc451175d9948ec00073299df6
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7efaacb2d49e10580ad20ede0905f55db9f3eea9cbfe029dde9cedca6cb06926
+size 4277536
diff --git a/out_tensor/model.layers.2.mlp.down_proj.safetensors b/out_tensor/model.layers.2.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2fdfa12a7f91c767729bcf2811f7657feee5e51a
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5bc29b14a1162ad56ae30dc47d0c5c41d620c2b102585cb4a4783f2b5a0f2b
+size 25619608
diff --git a/out_tensor/model.layers.2.mlp.gate_proj.safetensors b/out_tensor/model.layers.2.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6a7f301a8456ece156e313fe5f424731c8dc0df3
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c91a48d78012fd8d5dcf08103cb9d6af093c09929d45dab18848decb05ab3dd
+size 23700760
diff --git a/out_tensor/model.layers.2.mlp.up_proj.safetensors b/out_tensor/model.layers.2.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7c153e6f7963755597e43b27690b4edc17c25d1f
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de8326b34cb3a650998498d6a3151d89fc7ca1c659f9a6f84805341f803b9d2
+size 24790288
diff --git a/out_tensor/model.layers.2.self_attn.k_proj.safetensors b/out_tensor/model.layers.2.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..09a1a95db7455cb7b593ec72d8ad422bfccc3bb9
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f8e1fb3d9b58f0aed5dec4ce74bb19f10ed27c9d12a49f589cf354b38a3735a
+size 2233632
diff --git a/out_tensor/model.layers.2.self_attn.o_proj.safetensors b/out_tensor/model.layers.2.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da857057fba78390b8298ef7ce20473bc35e156c
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d474a0a1e03493915c67f0935a995b19acd657bcc8739c91ff045596c94654fe
+size 8881440
diff --git a/out_tensor/model.layers.2.self_attn.q_proj.safetensors b/out_tensor/model.layers.2.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dbf58e0896c33d0217c5d8cca89223f16fbc7bf8
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abe9b67219477b6a9d0a06ae5c467fd14906e73f0a7892f4b743f01a209aaba1
+size 8881440
diff --git a/out_tensor/model.layers.2.self_attn.v_proj.safetensors b/out_tensor/model.layers.2.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7dfba17ab060f4b6ad22930eca5b3a845916ef85
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61ef0e9754e0233246821b28caf4927e222f6e78a3deb4ed29f69c570daa1e4e
+size 2704672
diff --git a/out_tensor/model.layers.20.mlp.down_proj.safetensors b/out_tensor/model.layers.20.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6fe71d9fb30067021876f97cce5e841290ca75c
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b59699e8acef89823f349be454b1ec879d5cbe121ba843c914a731adfda378d
+size 39546016
diff --git a/out_tensor/model.layers.20.mlp.gate_proj.safetensors b/out_tensor/model.layers.20.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25c688ab0d564485e618b69c21c562dec4a86c4e
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:791f0ec59826dde7a929e686cdb080ed61ed3ae1ce9ab4b89207bc75a6b908f1
+size 38380832
diff --git a/out_tensor/model.layers.20.mlp.up_proj.safetensors b/out_tensor/model.layers.20.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01720bc98efa7f24c092407ec5dde78c8f0ccfaf
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1332790d6b6e86650f75bb5d857b73c261483bbdd4953d617eab69bee9a8214
+size 39470352
diff --git a/out_tensor/model.layers.20.self_attn.k_proj.safetensors b/out_tensor/model.layers.20.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f1c8d6a1a1a4477e719dad4ef91348c1e3bbe04
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9b0a11fe7af3c98c4750936b2188ccb2bee95f9218bbbcfdd9912a22560f4b
+size 3228960
diff --git a/out_tensor/model.layers.20.self_attn.o_proj.safetensors b/out_tensor/model.layers.20.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..981b52f52176573ae2cfd50eaf3f0611d9430174
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:000c406f02fcba39631e850afd949e862942471c9dc52a510ab6820179bf5605
+size 12862760
diff --git a/out_tensor/model.layers.20.self_attn.q_proj.safetensors b/out_tensor/model.layers.20.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..68515da369991d36c8cfba9627947b0725de07e3
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6dc9e758990bcb5d7ffe21eaed4aa4c25bcc5ec6adb56346e6e8b930439292
+size 12862760
diff --git a/out_tensor/model.layers.20.self_attn.v_proj.safetensors b/out_tensor/model.layers.20.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a38f72c8dcc940e3169bf40d35256ceccbbece09
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f804325de2daf304d8cb75c870b7cb60473a1e0fc29964292cd5c3e34bd28679
+size 4277536
diff --git a/out_tensor/model.layers.21.mlp.down_proj.safetensors b/out_tensor/model.layers.21.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..96077469cdc12bef159869c91bb98881dddf60e8
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:448aa6156bef2f8b305e5e3c648edc64f1f5c88d83e2ab05f9836ff2daa97db6
+size 39546016
diff --git a/out_tensor/model.layers.21.mlp.gate_proj.safetensors b/out_tensor/model.layers.21.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..098e5ade54f85f5a940a33074dcceb8f3843ed70
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6afb604476bc9192af340bed3edd46ef0bd7a9a460b212671258684552d0e83
+size 38380832
diff --git a/out_tensor/model.layers.21.mlp.up_proj.safetensors b/out_tensor/model.layers.21.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b658641dc8e1f10c79f47dbe34ba877a21d3cdbd
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e66369b9a0af942c5cf371c392ffa656358b8bae66f53895a42e8f5c4c3ebb5a
+size 39470352
diff --git a/out_tensor/model.layers.21.self_attn.k_proj.safetensors b/out_tensor/model.layers.21.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2c0d0f53eca4cfc5268d82accb8aea1f7d8fb5f
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1be8105f8ad3beb4f394eb3cde144ae702d64b4eb53cecc95ff852c7b86bb5e
+size 3228960
diff --git a/out_tensor/model.layers.21.self_attn.o_proj.safetensors b/out_tensor/model.layers.21.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..65d6daba19372b0b42e9d312a18f888c82351c4b
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7215f7a7fe8c51fc6d1fc023cbf7e60b38e8b73afb907bc8df10a32519a985ac
+size 12862760
diff --git a/out_tensor/model.layers.21.self_attn.q_proj.safetensors b/out_tensor/model.layers.21.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4d6dd97f051a8a5ef7f800f9898b44bead13d6c
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0396f6542836b0f409930e04c9c0a70c96e0d289923d8002711bd7940b0a30b2
+size 12862760
diff --git a/out_tensor/model.layers.21.self_attn.v_proj.safetensors b/out_tensor/model.layers.21.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4302b9563a314560fc54e543ee422aeabf207621
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3bc88c657ee95475436a10b45af58f9356cec5cda5378973784ec9415013418
+size 4277536
diff --git a/out_tensor/model.layers.22.mlp.down_proj.safetensors b/out_tensor/model.layers.22.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e570edd09a1424dd1a5fcd3b7f8ea927ce97fae8
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1e086d6ffcf5fdca7c61c244c61a20e70dd481ed7af2a4c23bc30de2b43fd57
+size 32582816
diff --git a/out_tensor/model.layers.22.mlp.gate_proj.safetensors b/out_tensor/model.layers.22.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..35b2d5ace0d0c20b67d173b03e37c039222783e1
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd45946e09c87e557fb126dfc28200bff4f6d437d0648a8b506703d0ef6cc987
+size 31040800
diff --git a/out_tensor/model.layers.22.mlp.up_proj.safetensors b/out_tensor/model.layers.22.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fec3700510bf0046ba278e256495c10a67165016
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e44dc44e7ce57f2ae768d487d42bffa238955c00afe8400db97814d20d9c67e7
+size 32130320
diff --git a/out_tensor/model.layers.22.self_attn.k_proj.safetensors b/out_tensor/model.layers.22.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4e3b06c327d3db636f4a780bde5e4992941498a
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15c3927f76deb13f9af21ada2c57a2afb90985ce2eaf54bbfe333ffebc2444f2
+size 2204576
diff --git a/out_tensor/model.layers.22.self_attn.o_proj.safetensors b/out_tensor/model.layers.22.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f95cd1db905ca22224db3db24b738abcf43ab171
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:047891680127c4769a84abaf7f0574986b1874f40df3802c9a0ad1fbdae0804a
+size 8766368
diff --git a/out_tensor/model.layers.22.self_attn.q_proj.safetensors b/out_tensor/model.layers.22.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..78dc6989ad5b37f0ae0dd5e54cfb07591dc0a7c1
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca7e1991fe1fe1995b3b5807f17bcf068f0db24034341d4cf9022ffbda163d9f
+size 8766368
diff --git a/out_tensor/model.layers.22.self_attn.v_proj.safetensors b/out_tensor/model.layers.22.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..671d61123c6cadac5601103c1cd64521ae3db2fc
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9af9e0ccebbe3e984ba9a693da9d2289ec62b3de5daad4515a292f668a3f1add
+size 2671520
diff --git a/out_tensor/model.layers.23.mlp.down_proj.safetensors b/out_tensor/model.layers.23.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..577ea0d470c852b3da0b76e876ee9df07a70d4ed
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c7db2895183d50200acaa3e5aca129f982656dd8bfb6001d8ca6465da77e63a
+size 32582816
diff --git a/out_tensor/model.layers.23.mlp.gate_proj.safetensors b/out_tensor/model.layers.23.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8776c1b2200347ff4daaef799ff96ddecbe1951c
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f64bba873869203e70475585c9b2f2c3894fdbf32b29f4ee35bab7b4e20ef27f
+size 31040800
diff --git a/out_tensor/model.layers.23.mlp.up_proj.safetensors b/out_tensor/model.layers.23.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5dc031cbeaf10f78440d4746a62d0013e8993b93
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab71003853ece6596f54507cc7106247c2fa5fa2cf6d0b29a42d5103633e80df
+size 32130320
diff --git a/out_tensor/model.layers.23.self_attn.k_proj.safetensors b/out_tensor/model.layers.23.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..35e4d23406da12ac2596cad434f9398c6e5e3535
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:134df01c756cd34036ffaa38b202039724b1bd1e310ae5a7f5901a84d0c31c23
+size 2204576
diff --git a/out_tensor/model.layers.23.self_attn.o_proj.safetensors b/out_tensor/model.layers.23.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a776902bbf5fee2326c361d038cebb32ad297eb
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9cee82e633736d036875d2fafa435ab59065e336d29e713a94f40be64f2ce81
+size 8766368
diff --git a/out_tensor/model.layers.23.self_attn.q_proj.safetensors b/out_tensor/model.layers.23.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..18f1e19ec0eb388d086d75da641bbfdd39b9a320
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a92cc0e88bb22120e1ed80987b0179de00d9789855342b4c06576c55cad58df0
+size 8766368
diff --git a/out_tensor/model.layers.23.self_attn.v_proj.safetensors b/out_tensor/model.layers.23.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..73f3029ba774fd7cb0ee9f863e3e957549b2f265
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e1a539118d3f92c7e2851afd96033b84a019e512f3fbc91784841d8eff6599
+size 2671520
diff --git a/out_tensor/model.layers.24.mlp.down_proj.safetensors b/out_tensor/model.layers.24.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95050cc43bd2c5663a5b3356df883e00ab22bcf2
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b754dfa6da88434e097441c556f38c4f100930ebb253e4e80a7190cac35cff5e
+size 45118764
diff --git a/out_tensor/model.layers.24.mlp.gate_proj.safetensors b/out_tensor/model.layers.24.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8d7c347e1fc69f75d6dae49960709521d7314ef8
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac94a8f1ff2304b2106680d190d03126ecb4095a092e6245a608804c36be0f1e
+size 44286680
diff --git a/out_tensor/model.layers.24.mlp.up_proj.safetensors b/out_tensor/model.layers.24.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..afeed18d4130778651b2684172c3313d51bfe5df
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35594b01fd84865c2a253ee7db1e82677e05cea2f4d25ef7ad6923f122cd580f
+size 44286672
diff --git a/out_tensor/model.layers.24.self_attn.k_proj.safetensors b/out_tensor/model.layers.24.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bd91389907a866f2b0ccacb951ba1ef02d406034
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:667c36026501d24e1d381bb678da945952ea5e2a5c65e8c465a126cec4df3fe0
+size 3228960
diff --git a/out_tensor/model.layers.24.self_attn.o_proj.safetensors b/out_tensor/model.layers.24.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2e6fe143e4f99c50c5bf5c571c14d64a4b9688d3
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb0ea36f56cb1ff84c25e6cc36cc85e045da5914585f6f25fa3472ebe8463bf
+size 12862760
diff --git a/out_tensor/model.layers.24.self_attn.q_proj.safetensors b/out_tensor/model.layers.24.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30b244d572d1bc8cf01d68ffd37e2b63403db369
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95102d8d96841afe35a4b21f5fefb196cf3430769e0c4beffc86c8ca7445124e
+size 12862760
diff --git a/out_tensor/model.layers.24.self_attn.v_proj.safetensors b/out_tensor/model.layers.24.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..73eb0dff7e367fce47cd8d28bcde33c7cc3a4c32
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9de496d897049db91fbb39a67595efeadaf1609fd975be4bbffb048490a18d6
+size 4277536
diff --git a/out_tensor/model.layers.25.mlp.down_proj.safetensors b/out_tensor/model.layers.25.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ae31f5afebee35ac5bceb5dd680caba56f4c33f
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c82446cf242236f066280157d5871ee2228a478b3b130f6a7eb12ed31c3800d2
+size 45118764
diff --git a/out_tensor/model.layers.25.mlp.gate_proj.safetensors b/out_tensor/model.layers.25.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5ffc1b5a99d732f54d1a756b8e8f05994636a947
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0613de9ef5e29c49c9c31634396cb2fe4c5cb4b25b62141074ee1aee4d04ed82
+size 44286680
diff --git a/out_tensor/model.layers.25.mlp.up_proj.safetensors b/out_tensor/model.layers.25.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37a35057c3e81e957a1a95b3ad807d3fc7e73899
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42c8bad274956958fef7eea9ee6cb3ab29aa8868132a4b31532e34a36f10bc33
+size 44286672
diff --git a/out_tensor/model.layers.25.self_attn.k_proj.safetensors b/out_tensor/model.layers.25.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9a1278a6806d5a5fdf00697a6b72a1f0038b2d30
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49128c3b7a2ccbaa82822cbaf02d6308242544ee5f0d496fbd5ed4912c05df73
+size 3228960
diff --git a/out_tensor/model.layers.25.self_attn.o_proj.safetensors b/out_tensor/model.layers.25.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..feae4ac885ea38d7ee17d259f0fc61fb4da84566
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cb73c6b6f2119169ff0031c5ac8dd18014c12f69390554890e06fa647e4c9e8
+size 12862760
diff --git a/out_tensor/model.layers.25.self_attn.q_proj.safetensors b/out_tensor/model.layers.25.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..458e8eff016279a4e9a51fcab16e642063253d96
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07358b264cf19f3e8bc5d835f79ef04926aa25d874685eb326b9e72636d494c9
+size 12862760
diff --git a/out_tensor/model.layers.25.self_attn.v_proj.safetensors b/out_tensor/model.layers.25.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a65711f6470f6cad5ba758333796257fcdda84c6
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce7d759b77d4a547c41fb6a08a0004874930013f0c076f3740a9c399df0501ba
+size 4277536
diff --git a/out_tensor/model.layers.26.mlp.down_proj.safetensors b/out_tensor/model.layers.26.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1eb712674223325b7e5b4d950ef9fc3022528df8
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:235085470b98faae52beca7e85b2bbea3956090ec836f65c613fcc727e56b614
+size 45118764
diff --git a/out_tensor/model.layers.26.mlp.gate_proj.safetensors b/out_tensor/model.layers.26.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a88b8d2f372303a15a3cf9f19e13a6bfd13ffb1
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aca7fc44990d793297254d8cb43e9ef3427f23bdd45fc04d5e1790eee2007a1
+size 44286680
diff --git a/out_tensor/model.layers.26.mlp.up_proj.safetensors b/out_tensor/model.layers.26.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b351e3a69f486bff0b68864e2aee8b78549120d8
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdb37629caccb5f8b5823a458f739ab93856c87f0227e97dcd77893a4259812c
+size 44286672
diff --git a/out_tensor/model.layers.26.self_attn.k_proj.safetensors b/out_tensor/model.layers.26.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d7b886fd206df9100197d9e62952c3ac60633d2c
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35f85630fbb1c9d5be6fa046294de3190f46b66a4fbdc1ae72550ce1793cf473
+size 3228960
diff --git a/out_tensor/model.layers.26.self_attn.o_proj.safetensors b/out_tensor/model.layers.26.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4a9f20fb1716b2aee84585cfd358a16342fdfc3
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55ca5159528a1e3681e16fa1fa6cba2ef681791f0a830b97f82924c57cfdc041
+size 12862760
diff --git a/out_tensor/model.layers.26.self_attn.q_proj.safetensors b/out_tensor/model.layers.26.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..44430ee2de3a5065b6c23cc685e64137a8b2e297
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c630191e8d5c852d3d67a11b20f9559501e2f8ebc1a4bc3fc494f36cfa81bee
+size 12862760
diff --git a/out_tensor/model.layers.26.self_attn.v_proj.safetensors b/out_tensor/model.layers.26.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..737e0527ea3da8c525be53e431415afe0c0fe318
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8a13dd10d5cdebe6d758a60a5dd2fadd1428ade29c961e3e085c055e2dbe87f
+size 4277536
diff --git a/out_tensor/model.layers.27.mlp.down_proj.safetensors b/out_tensor/model.layers.27.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e39f12b126990527022534eb2c5c2d39ec184267
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0907406575f636fa059a9824d6fd66244eafdc221a6d4be85cd1ef6a1712579d
+size 45118764
diff --git a/out_tensor/model.layers.27.mlp.gate_proj.safetensors b/out_tensor/model.layers.27.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ed729b2021e6dff4169157b2fe7a07c0cc06151
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60ac3e0611ab964b94bfaa39670ce8e11a65e5363fde3c068b719eb997743391
+size 44286680
diff --git a/out_tensor/model.layers.27.mlp.up_proj.safetensors b/out_tensor/model.layers.27.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5e984cdf70516fda727c6c79c00dedcdc6d9adf
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3aca2affa0002883a6e1717af8bd009fefa35e2eb5eda2431edaf46df631cc5e
+size 44286672
diff --git a/out_tensor/model.layers.27.self_attn.k_proj.safetensors b/out_tensor/model.layers.27.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f4150ee9be42d747b0beb85edbb84a58d06d9ee1
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6733ac144f2c62afd2b10dd9ef6b1a600e2357fd1575406aba0e00dcfcf292a1
+size 3228960
diff --git a/out_tensor/model.layers.27.self_attn.o_proj.safetensors b/out_tensor/model.layers.27.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1c999f7e4b82e1344166b767d0b2b639f19e7a2f
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36f1a2cccd0db5c434e12c592a3d784a8be22c93539a116e1e45230d650c868b
+size 12862760
diff --git a/out_tensor/model.layers.27.self_attn.q_proj.safetensors b/out_tensor/model.layers.27.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5a7c1732e531c242ec11f6c9edeb0f7781ab955a
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e36a5d27a98d09492ab1b2986aadaa733ac5f1539deb654980db73549a97ca8b
+size 12862760
diff --git a/out_tensor/model.layers.27.self_attn.v_proj.safetensors b/out_tensor/model.layers.27.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d921a1cbee466074f3ee5604b8c921304f6a2864
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d77682f3daf64ee11dca98a30ee436b952fac6c7e81a06354aea87adde3a6fb4
+size 4277536
diff --git a/out_tensor/model.layers.28.mlp.down_proj.safetensors b/out_tensor/model.layers.28.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2c462441fb1fa235d71c05fc33ce6de863c3eb27
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fb4ae32e59103b525cd8171d8ad80849aff675da1c0810beb47d4684fce7ec0
+size 32582816
diff --git a/out_tensor/model.layers.28.mlp.gate_proj.safetensors b/out_tensor/model.layers.28.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f47fb92ec0d12cc2776474d87d55b0af6235cb4
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54a463cd78588af38075f6c0901e17d63ec999519d96fc9bbd3cecd1911a4620
+size 31040800
diff --git a/out_tensor/model.layers.28.mlp.up_proj.safetensors b/out_tensor/model.layers.28.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dabb86b643f28d873753ec12ec31f1b1eb4b31e2
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c433bb70d636a1ea9651ae8049fa251e00a230b8cbb250d3ae9d9f3ec183e02
+size 32130320
diff --git a/out_tensor/model.layers.28.self_attn.k_proj.safetensors b/out_tensor/model.layers.28.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..543cee33db1eca85f1631ad991db72c2f72e763b
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ec88229a9bed9431d0acd6ece5f24bb8c3a2bc8d72d9eadaf372738e25a9351
+size 2204576
diff --git a/out_tensor/model.layers.28.self_attn.o_proj.safetensors b/out_tensor/model.layers.28.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e93c48ca1a04bec16da08de9d443ee817e72fdc9
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0dd4858fbbb7a647a8f0fd763f985681aaa0d3d56fdc5972979314f5b282645
+size 8766368
diff --git a/out_tensor/model.layers.28.self_attn.q_proj.safetensors b/out_tensor/model.layers.28.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f0abdba343c874a56cac36d7fdc20c29430c2a3d
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2edad1aa90aaaf7531479696a8e018ac7cd6122be380da7a4ce0703f9fbfbde0
+size 8766368
diff --git a/out_tensor/model.layers.28.self_attn.v_proj.safetensors b/out_tensor/model.layers.28.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c77ae451ca4f630fa19e5f40b5bfc67dffac970e
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e550f4ba611e273672328d1a001cca5aa6d2526aa41245dcbc1f33590914dea9
+size 2233632
diff --git a/out_tensor/model.layers.29.mlp.down_proj.safetensors b/out_tensor/model.layers.29.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8723543e66d9ee2a7054006ca9241c1a23dcb433
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f76d1ced9662819a2f4c59116e763c77385af418ddcdda8c8204b928bcf7344c
+size 32582816
diff --git a/out_tensor/model.layers.29.mlp.gate_proj.safetensors b/out_tensor/model.layers.29.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..188129dd3ab285ec08608d3900f3e996dfb980d9
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9a539759c39146d2865a6c1552e68931d8fde7b580e1d0784c6db6447a3658d
+size 31040800
diff --git a/out_tensor/model.layers.29.mlp.up_proj.safetensors b/out_tensor/model.layers.29.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2131e12e7fd9139ce3765a5d1080cde72ee12718
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473a3fe41bee4f62017612f4588b77b61d1c2962beb7e72385242c02aefe603a
+size 32130320
diff --git a/out_tensor/model.layers.29.self_attn.k_proj.safetensors b/out_tensor/model.layers.29.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bfff5d12d5e2792a9048f6721d35d8b687f49535
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58cb66e3e9d64c39a1d43d41289c82b2e94e485d2d78e359c9932efdd1f3ef45
+size 2204576
diff --git a/out_tensor/model.layers.29.self_attn.o_proj.safetensors b/out_tensor/model.layers.29.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6d8554169fffe9a6dd27145a343ccebdc2fb85ed
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fd42f9f85210e7a69aba7a7d918a7eee93542b0ee83a66aa7aefbf18b079755
+size 8766368
diff --git a/out_tensor/model.layers.29.self_attn.q_proj.safetensors b/out_tensor/model.layers.29.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..193809debf903f014855f466f07cca2ec9b5ef69
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c386617313bf35efd45d4a397dc8300413f632e12a9d3fe75543f801fab1756
+size 8766368
diff --git a/out_tensor/model.layers.29.self_attn.v_proj.safetensors b/out_tensor/model.layers.29.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7f2a104fd42510d101c3ea38b74dbb944473d0e
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d80c66fd4daa6cd48771d0eaeef5d76dede5d28a314eb66aa936e013a2f1dfa
+size 2233632
diff --git a/out_tensor/model.layers.3.mlp.down_proj.safetensors b/out_tensor/model.layers.3.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c53593491278c9b0562488d5bd7b91e66dd7d187
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9373c0908ede416462b07f110cb63a6e050bcedf36a35878d6a9fb1f1eb64bfd
+size 32582808
diff --git a/out_tensor/model.layers.3.mlp.gate_proj.safetensors b/out_tensor/model.layers.3.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fc44032dcd0c66435815d624affc9ace6454ed33
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb7017472498a4cd4df2b82d93baa84eff5279b069df76520eba38e8abf11f41
+size 31040792
diff --git a/out_tensor/model.layers.3.mlp.up_proj.safetensors b/out_tensor/model.layers.3.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e78c603fc480fea02b01b89c1890f18838e950c1
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc440af571bfdacbee32f5fc85080dee38adcf410205290c0d08ad29af3bfdae
+size 32130320
diff --git a/out_tensor/model.layers.3.self_attn.k_proj.safetensors b/out_tensor/model.layers.3.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b5d080b318b0e248dfde82eef3d66d6626d042b
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1408dde17a7d58adff494f81f9935d8747f55d06a31ec15eae8a65881778b3cf
+size 2233632
diff --git a/out_tensor/model.layers.3.self_attn.o_proj.safetensors b/out_tensor/model.layers.3.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d54abbb68fd4e2601316d6564bde6350cbdaebc5
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd056ecdc332245f701be1b834c61f03019fa68eff968cadcb4f1ad888624ca
+size 8881440
diff --git a/out_tensor/model.layers.3.self_attn.q_proj.safetensors b/out_tensor/model.layers.3.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9849ced77814edf24f8a14fe904fc29431ab3db0
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a054b57e57bf1c7f830699d6c9c142097a707bf495a9109a2ed77c066db79cd1
+size 8881440
diff --git a/out_tensor/model.layers.3.self_attn.v_proj.safetensors b/out_tensor/model.layers.3.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..69378277c8b322b4668e49700f2c416b35cb18fc
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:622f21d434e9f8224ca4145494264048d7179512274ba64063ec990d13223b8c
+size 2704672
diff --git a/out_tensor/model.layers.30.mlp.down_proj.safetensors b/out_tensor/model.layers.30.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd674540ad67934c2b42ff869bb23bf6b752184a
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bba60fcc17e98f667c9089e61e0f4bc114f173d69625990c36e6c03257689c6f
+size 45118764
diff --git a/out_tensor/model.layers.30.mlp.gate_proj.safetensors b/out_tensor/model.layers.30.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eb4d5084a55ec172617e3c8aafcc7c10ea99a15d
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e31b4a1e4c097f771381b7f2f203935cc9b6ea76c1c6b17356e3758796695e8
+size 44286680
diff --git a/out_tensor/model.layers.30.mlp.up_proj.safetensors b/out_tensor/model.layers.30.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a2f19ee0bf87b7360446dc4503ac3e12f7600c4
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:025fea7b8b2bba50b1fe02622f27be8d0beb25c2e92b680117d56dd9dc591179
+size 44286672
diff --git a/out_tensor/model.layers.30.self_attn.k_proj.safetensors b/out_tensor/model.layers.30.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cf78d5b527003c91bb439c6651b18aac3f414adb
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7db8962ec5b0eaca9c61614c25a2761d5b3c4f537736a1022e3e5b428876881f
+size 2757920
diff --git a/out_tensor/model.layers.30.self_attn.o_proj.safetensors b/out_tensor/model.layers.30.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c4eaa7aeb600586d99a0adc30862d5557782df5e
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:947c993ee5ff41034ff3b49557b4bc1eb7b4da18929593dbf926e5731527ab9f
+size 10978600
diff --git a/out_tensor/model.layers.30.self_attn.q_proj.safetensors b/out_tensor/model.layers.30.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1e23148ff83d05c301bb7eeb2f7b7333efb0a74d
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de469e49442cab16302548020f90714cddeacf0f3231580fc79508937a2d5683
+size 10978600
diff --git a/out_tensor/model.layers.30.self_attn.v_proj.safetensors b/out_tensor/model.layers.30.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fd003678d6c4d10669ca61ba7e0de627e75e2a17
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da7e1890c8e3fdab473e575be5ce18ca8393b38b604038aeeeda6fd08bb2cce9
+size 3228960
diff --git a/out_tensor/model.layers.31.mlp.down_proj.safetensors b/out_tensor/model.layers.31.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bca68bc4aab8d272532200c7d98ad796dbe46ac3
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:162515970d212aa2e6bed82bdbc226c29e759b78218ea0420a87c52365b3c056
+size 45118764
diff --git a/out_tensor/model.layers.31.mlp.gate_proj.safetensors b/out_tensor/model.layers.31.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..de552fe22c1db7f799285b6979187498fc6829a3
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d694173cfcb812550128667bfb0ffea183ed080161b0cdefd651fa45ecfe4ed
+size 44286680
diff --git a/out_tensor/model.layers.31.mlp.up_proj.safetensors b/out_tensor/model.layers.31.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4bd0486c2cd624ae4ba746a0697ece50a70c3181
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:223f1ee81edb1b707c55bb5277f1952af66c6441f21947ad886e838abc7966e0
+size 44286672
diff --git a/out_tensor/model.layers.31.self_attn.k_proj.safetensors b/out_tensor/model.layers.31.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bbb59b9e2f32656d3f84eedb6a294dc1aa38ef0f
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d39bf9c618cc9ee437207b3b9b19b8e07a3e20012ecc1ac81911db843394d633
+size 2757920
diff --git a/out_tensor/model.layers.31.self_attn.o_proj.safetensors b/out_tensor/model.layers.31.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f7f7cc8737e3e045aaf5473e6d7c193ff8ee890
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:891ef4c1c587463d9b4d1bf301c6efc0cf7260aaa4e398871001c6821bfa0a68
+size 10978600
diff --git a/out_tensor/model.layers.31.self_attn.q_proj.safetensors b/out_tensor/model.layers.31.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8189e3da33fde7baf95a2c89eeadf03abc356e32
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b22388b4a45b86d9f06d1d4210b6ab1b2719a94705500a644355eb3fa49062cd
+size 10978600
diff --git a/out_tensor/model.layers.31.self_attn.v_proj.safetensors b/out_tensor/model.layers.31.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..56af03ac79f218b193368f5d36d737eabfff919f
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1211e5acfc3170c1e17cac6088d6347b1b7f6efddbeb210b1c371dea124144c
+size 3228960
diff --git a/out_tensor/model.layers.32.mlp.down_proj.safetensors b/out_tensor/model.layers.32.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..893abdbea09069b18c9bbfe41a17fee2e957c4a8
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34056c54f2a5ff27d3db19ee10d66753a41735cde962af056da940223a40e497
+size 45118764
diff --git a/out_tensor/model.layers.32.mlp.gate_proj.safetensors b/out_tensor/model.layers.32.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..54531f687118039001cf85d6739e5c71453003ac
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df7cdeb15d7a792b483df638bf0dc9b0098b60491d7bf2d61fecd7180861469
+size 44286680
diff --git a/out_tensor/model.layers.32.mlp.up_proj.safetensors b/out_tensor/model.layers.32.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..474b06db501c8e8cd6c103ba4901cc0331e31ccf
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c02c19256961877b0bb51ab5248ae20e1caebc5f5584f4ff5dc522416b093f43
+size 44286672
diff --git a/out_tensor/model.layers.32.self_attn.k_proj.safetensors b/out_tensor/model.layers.32.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..929f4aea92de30d064036690126152338c209e94
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4055a29eb64a27ff39f86584913d940144bb8e8bb1449cd6a0965da5476eb602
+size 2757920
diff --git a/out_tensor/model.layers.32.self_attn.o_proj.safetensors b/out_tensor/model.layers.32.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b1a089753915e459851676b9978b4090a81e9726
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c567cca6bc5cd83516297acce44b4c36e83869e9c017f86e3b19b3fa277a49b
+size 10978600
diff --git a/out_tensor/model.layers.32.self_attn.q_proj.safetensors b/out_tensor/model.layers.32.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..48fbee15ecc2e27fe0ac287e354080349e245aaa
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:899248af347106d7359732c47e407e9423e0e20b2b3d1bce249d11600245da55
+size 10978600
diff --git a/out_tensor/model.layers.32.self_attn.v_proj.safetensors b/out_tensor/model.layers.32.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d1d95999993d6a7120b4db97f1fe01adb4e32c6e
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:510e97636bbb413be0e6d9702769e752ef80e60f4e1a124afdf07a161959301c
+size 3228960
diff --git a/out_tensor/model.layers.33.mlp.down_proj.safetensors b/out_tensor/model.layers.33.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ddb6bd69a45ac08ce8796fb31cf20aa1f3e4712e
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e10314e4981f961c7e83895f82bfc657117fa7a6f76705f063277bebdea0dd73
+size 45118764
diff --git a/out_tensor/model.layers.33.mlp.gate_proj.safetensors b/out_tensor/model.layers.33.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a6a85d40a32eb989f514d6b4cddc2b50b6420a7a
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ceabe2507c0b58b73647d8e0742cad04d5a291164797a5f9a8de46dfd6a73ec
+size 44286680
diff --git a/out_tensor/model.layers.33.mlp.up_proj.safetensors b/out_tensor/model.layers.33.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..863c40cd411a91e1c5882453ee717558f45f945c
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e3dc2547019cdadea13cbe5b21fdb34c646f6a8741706ca0f5224077f71bf0
+size 44286672
diff --git a/out_tensor/model.layers.33.self_attn.k_proj.safetensors b/out_tensor/model.layers.33.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..de942c70ff4112822f4e51beb19a7e251901338a
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b92ebf805a5c7a2891665f93878e330df4490e9a916cb1a6f2ccb556953ce8d
+size 2757920
diff --git a/out_tensor/model.layers.33.self_attn.o_proj.safetensors b/out_tensor/model.layers.33.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9ad73980b224c8cd928b10034eec4e13a775a50e
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4a5f75afa5bcd57d9985021d57796bba41a4e47e5747369e91978fed805aec0
+size 10978600
diff --git a/out_tensor/model.layers.33.self_attn.q_proj.safetensors b/out_tensor/model.layers.33.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..00ae277d2efd7083cba2311d644bc9016a69e976
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0522902d974024a7b8ecddbfd82d5ded46672bf00266ac3a458a22b0b3a2f8b5
+size 10978600
diff --git a/out_tensor/model.layers.33.self_attn.v_proj.safetensors b/out_tensor/model.layers.33.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..61ca27a9f9f956c141387b4af314ad630185b7f9
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ac370c6ae28c5d6740944e73dfab607a473564deea59c8cfab46d824cd3e57
+size 3228960
diff --git a/out_tensor/model.layers.34.mlp.down_proj.safetensors b/out_tensor/model.layers.34.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a22a5a83bc7ebd18e69afd7740a7f7cd22288963
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb0034df4f678229b37180832ea3f69e2f1ee41cea236ce76f7a35799c01b975
+size 32582816
diff --git a/out_tensor/model.layers.34.mlp.gate_proj.safetensors b/out_tensor/model.layers.34.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0886e8d6b4c0cc50593f31e5c7fe91c9b8165f51
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:186ac3817f983de480efc0f250b6075b41decd348ac8047a625f04eb65bd584d
+size 31040800
diff --git a/out_tensor/model.layers.34.mlp.up_proj.safetensors b/out_tensor/model.layers.34.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa2131606bc45bbba030a247f9f4f36e9fd37e10
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb4620768979f5b768bd30cda14391a6cdfc47a21baddf2c0ca0d430cfd8b793
+size 32130320
diff --git a/out_tensor/model.layers.34.self_attn.k_proj.safetensors b/out_tensor/model.layers.34.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6590542045b9f79603f7e1762e9678edf897ff74
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:905065f5ca594de0e837fc2c75bc1a36cf2388d72cfba0a388257714718c2158
+size 1671904
diff --git a/out_tensor/model.layers.34.self_attn.o_proj.safetensors b/out_tensor/model.layers.34.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6ab778443a06380eabd653ebb653f8b7a317bee
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32ef39ea15c08c5864607e47eee31edf333d8b3407097cb2d24fa5417ae5ea93
+size 6636256
diff --git a/out_tensor/model.layers.34.self_attn.q_proj.safetensors b/out_tensor/model.layers.34.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b620d30191834faaf8ab5a0457d97438d78459a2
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07d4572afb10664187ff4e21c96032b352a8af363cc79a4e4732f8ed62f6aa40
+size 6636256
diff --git a/out_tensor/model.layers.34.self_attn.v_proj.safetensors b/out_tensor/model.layers.34.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4aef0880a750517650df167b5ef2b09e0139de9
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:724e717caacc3ed26f552c64ebd6711dfdd1fe6455b9e086e7514af3df3e2373
+size 1680288
diff --git a/out_tensor/model.layers.35.mlp.down_proj.safetensors b/out_tensor/model.layers.35.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5f3a4d021a236b35967ae88517ca9b810f12202d
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdde1d6828f9428da184f6315d8a22c8aa908df8e51a7b86d6738a45d488e80f
+size 39546016
diff --git a/out_tensor/model.layers.35.mlp.gate_proj.safetensors b/out_tensor/model.layers.35.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e2624d2d1ddd50a837e39bbc1029a7de680ed277
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28d96922fc4fe8ec206bc4bcee7c7edae9964d40c08832050b8663e0e2163076
+size 38380832
diff --git a/out_tensor/model.layers.35.mlp.up_proj.safetensors b/out_tensor/model.layers.35.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f7028e2e6c10c9d6b94c7ba000a060b695c7cc3
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2c18c025d64aad1978953a797ac1d2efb070cafd044d8ba7c6aa4b433f33c42
+size 39470352
diff --git a/out_tensor/model.layers.35.self_attn.k_proj.safetensors b/out_tensor/model.layers.35.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..57a501311fa2f296b79b1d79cf9d9de1ec47d4b8
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1bea7c87fba7ca4ac078235b3f1a83f4cbc0b731aa4c0bdd344b9834d2aeaf7
+size 2233632
diff --git a/out_tensor/model.layers.35.self_attn.o_proj.safetensors b/out_tensor/model.layers.35.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5a956c4b7ca46365e042a435d92ea24cdd389644
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91d98d63b9123e138529a3e7a252f3ff44cb27a9b0b3a46a0df33d376069408f
+size 8881448
diff --git a/out_tensor/model.layers.35.self_attn.q_proj.safetensors b/out_tensor/model.layers.35.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4def26a24082372bddd99e521b0efab0f6c6b3aa
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e8e9734196220baacad69d43625cae35fbed3c9e15a821c16b7d74ad7e5ca5
+size 8881448
diff --git a/out_tensor/model.layers.35.self_attn.v_proj.safetensors b/out_tensor/model.layers.35.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c6824ba20c53660ad7d5b6d5742b67f0efd86e58
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f55236a821f54903260ade5ee2ac351117c566964596aab3cd7722b9ea5fe12
+size 2704672
diff --git a/out_tensor/model.layers.36.mlp.down_proj.safetensors b/out_tensor/model.layers.36.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b291172823f50b7db8649df55f711fcbc02689b
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fa2fd2581733f14240c8e0ee086d48321970a2fea22d5b68a8ee6119e5b3cb9
+size 45118764
diff --git a/out_tensor/model.layers.36.mlp.gate_proj.safetensors b/out_tensor/model.layers.36.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..660766b294243e1d20a78e7ccd37b23026bd206a
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50f8b21f4a0603e3d9ea166279cbde384ba2cb7f1d46beedb5da92a46831013c
+size 44286680
diff --git a/out_tensor/model.layers.36.mlp.up_proj.safetensors b/out_tensor/model.layers.36.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e42b7e44b8419e5e879e7cd942010797f02c910
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d41f6a591601bdafd8e0df160a804a4fd61a44809f5104a5f58e4548c238019b
+size 44286672
diff --git a/out_tensor/model.layers.36.self_attn.k_proj.safetensors b/out_tensor/model.layers.36.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..229ab2fdcb7c06ec975ee09e31a62ae3223dc70c
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1474632d4970601fc1791134e8bcbb310b018ba146815e001e2fd73c7a960519
+size 2757920
diff --git a/out_tensor/model.layers.36.self_attn.o_proj.safetensors b/out_tensor/model.layers.36.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..558619edec8d4d3bbd5589fab8d38358d432e107
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97d99de72e0034d356c81792bec798dddf135091c004e38eae452b646b2ad477
+size 10978600
diff --git a/out_tensor/model.layers.36.self_attn.q_proj.safetensors b/out_tensor/model.layers.36.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c3325dfbcbd0a2527c8890b9d4e0d7994dc3b04a
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65bd2f492e44e7675df9b6ef19d6e0e6c274a00f62839e20043688b8844e9d99
+size 10978600
diff --git a/out_tensor/model.layers.36.self_attn.v_proj.safetensors b/out_tensor/model.layers.36.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5ca25d0fab739fd6cb15e4062325379dafacc19d
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6d8fec3097aba37347bf265342665a8b1c16405ad07187b1715f9a76e703fd7
+size 3228960
diff --git a/out_tensor/model.layers.37.mlp.down_proj.safetensors b/out_tensor/model.layers.37.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a55c6412eb53c5f9b82a005bed70df90842ed1a
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0854e626733fd03fc76b66bb507be2e397dbb67ca1796125a1dd121b570ca3
+size 45118764
diff --git a/out_tensor/model.layers.37.mlp.gate_proj.safetensors b/out_tensor/model.layers.37.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..14c2b8fc3b49d88a5d529bde8edd97b3aad0b2d4
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5904c3c206e2126c25e88daacbab02573b3f97d8f95a918e9a5d9f30d1a2f70
+size 44286680
diff --git a/out_tensor/model.layers.37.mlp.up_proj.safetensors b/out_tensor/model.layers.37.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f5c29bf144c883a875e4388f9a4211fb5d8a3d95
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b371c8ade52633ba2be034a6303653db92167df43aedddcdfe71bc7a185d7be0
+size 44286672
diff --git a/out_tensor/model.layers.37.self_attn.k_proj.safetensors b/out_tensor/model.layers.37.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..db28f092b41d6884282070bcbc4eb4a07a482456
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b5e3b15d2c3125d7c36e1c8ed6ab9c1750698bef7ed7ccb8b32170b14a28274
+size 2757920
diff --git a/out_tensor/model.layers.37.self_attn.o_proj.safetensors b/out_tensor/model.layers.37.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8572cd3f868667c7a042bbd8dc7b95b9e523e43a
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c513463364f40382ad6846e4161653825fb9f439d394668ac8b2a2f6c173406
+size 10978600
diff --git a/out_tensor/model.layers.37.self_attn.q_proj.safetensors b/out_tensor/model.layers.37.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d55ea0e96777d6a78f0e0f7b7f4a230d098df47f
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10120bf6abcc354058d358c9189773613958bcffd6cc78b0e4cdc658d317a138
+size 10978600
diff --git a/out_tensor/model.layers.37.self_attn.v_proj.safetensors b/out_tensor/model.layers.37.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d722724495de4447052662bb495b03d29ccdc0e
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22137636dfc97b36df30688bef6516ca8caf4b8bf94cf8418068e35cefa7e76a
+size 3228960
diff --git a/out_tensor/model.layers.38.mlp.down_proj.safetensors b/out_tensor/model.layers.38.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e351037d131b28745cdd62dda6286f00d80e681c
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03967703ae12705b94bac6fd4112de413fbb4b21f8ff0825efbbf1590464ab5f
+size 45118764
diff --git a/out_tensor/model.layers.38.mlp.gate_proj.safetensors b/out_tensor/model.layers.38.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3d29cf44ef1ea73652148e410afeb00895bb95a1
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44ccfbe705f45d4ea3fecffcc8bbbbd204c1f32ddbeb66190181e74e929e1d6f
+size 44286680
diff --git a/out_tensor/model.layers.38.mlp.up_proj.safetensors b/out_tensor/model.layers.38.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29f6dfb386b390911a2b640f6081c52611ac9681
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f8ef8b5367b92663a5339cc5eaf52fac4d1a9065c76146cd69eeae79f92f242
+size 44286672
diff --git a/out_tensor/model.layers.38.self_attn.k_proj.safetensors b/out_tensor/model.layers.38.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f24bab2347ae603274f5fed06339934589eb04a
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f233643733dcd04749445d8c539fc760f3491a882e7e76fe8f3835eb69212b53
+size 2757920
diff --git a/out_tensor/model.layers.38.self_attn.o_proj.safetensors b/out_tensor/model.layers.38.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4d0e5ea60825b7cfef83c048fb64047d814cd4b
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:273ff57ca9f4f034f07600f5648d06ba530f0fe76af2e33e9a3ef434a4e70a05
+size 10978600
diff --git a/out_tensor/model.layers.38.self_attn.q_proj.safetensors b/out_tensor/model.layers.38.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0ca2ffa884393e24ae91a954b8b582ab38535d1d
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:929c29b63e70fbc26dd97b16c2dd37e1c7c733ff58bbb18725b52d3cbd238711
+size 10978600
diff --git a/out_tensor/model.layers.38.self_attn.v_proj.safetensors b/out_tensor/model.layers.38.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d2052254bb1eb4f869400f8a1b320c3d39d1eacc
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:721cd8ead0e71e4e7a527aac27b79c2862e1b0ec8b5f9a9dad29ce3c81647ed2
+size 3228960
diff --git a/out_tensor/model.layers.39.mlp.down_proj.safetensors b/out_tensor/model.layers.39.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1a0284ffc7a15752b4850bcd971abfe4d46d266d
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39a202f8576f94508f1ae082210d1a6f2236b70bd274a06c7605fe53b1b027f8
+size 45118764
diff --git a/out_tensor/model.layers.39.mlp.gate_proj.safetensors b/out_tensor/model.layers.39.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2641f648eeb603baa6629e30ab31567ac1ad8837
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:614152cffc0615ac6acf872f0a7eb24818b674f532d9fb0bcf6fce94cca56084
+size 44286680
diff --git a/out_tensor/model.layers.39.mlp.up_proj.safetensors b/out_tensor/model.layers.39.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e99741cd9a88f81104e87f13cbfff87d0bd269cd
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0657bed16a6a0d3cc21ccbf8320e48ba27fa725679ad2064e40f9cdb3d82f002
+size 44286672
diff --git a/out_tensor/model.layers.39.self_attn.k_proj.safetensors b/out_tensor/model.layers.39.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ef72b57b7e553dccac812ddeaf5c72ec7e35fc2
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e90f4c9c7f6a2089ea46545ed05a82e45c8cf26ea2a0291e312f71756eb947f8
+size 3228960
diff --git a/out_tensor/model.layers.39.self_attn.o_proj.safetensors b/out_tensor/model.layers.39.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17735b72d0432fa3c879f04fdfaf90416810f4c1
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:836ad907dc3abf76d22089ea8b18b7c3692f227d982af425388b43f6d849d0aa
+size 12862760
diff --git a/out_tensor/model.layers.39.self_attn.q_proj.safetensors b/out_tensor/model.layers.39.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..75ef7e74ad3cb93a0e237834df018b4f11d393cb
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aad6d6fa9b5cf4f1ad78ed46f1c9d0daacc4fa54135ecdad4ab9f17992fadab7
+size 12862760
diff --git a/out_tensor/model.layers.39.self_attn.v_proj.safetensors b/out_tensor/model.layers.39.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6b861aa78498fbe00abbecab4c97c1fd4619748
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8a2d31557e2aa08f95cfa9f73d6333d9386fbf8977a9f3655d3df54eda12da8
+size 4277536
diff --git a/out_tensor/model.layers.4.mlp.down_proj.safetensors b/out_tensor/model.layers.4.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e5185025f58d17b37a5ec65ebb542c773aa25
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb699e8fb8f0d47a2b98616b62e5cdfa2be734be87f34772c3979c9e869a25bc
+size 18204576
diff --git a/out_tensor/model.layers.4.mlp.gate_proj.safetensors b/out_tensor/model.layers.4.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5c52de191748bf12537113e93d380bcc6d4121c1
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:538988e0b54e5c7fe1949b9940fbd56122dc13de0d54eec64a5c29328f149842
+size 15614872
diff --git a/out_tensor/model.layers.4.mlp.up_proj.safetensors b/out_tensor/model.layers.4.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b34d55282420698b244055662c299c9a43ff6e41
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef8ac7d0c3f200b6b173355555d82c3b13b59dd9df0e9ca43766c15cbec9451
+size 15614856
diff --git a/out_tensor/model.layers.4.self_attn.k_proj.safetensors b/out_tensor/model.layers.4.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..22dada502a8138c20d98883fcecd42a97c9950cb
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e889f0fce7d51421b921439478dfb2a1d596861bd167e258f14fb482f73f4e6
+size 1155992
diff --git a/out_tensor/model.layers.4.self_attn.o_proj.safetensors b/out_tensor/model.layers.4.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01c7cec22865a62ff36913c6db98ece1dc59a0f2
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d0cfe6b4e4ee28a86bb5faf4d8537f1ece1a43e93109d36a74606a33f786cfc
+size 4572064
diff --git a/out_tensor/model.layers.4.self_attn.q_proj.safetensors b/out_tensor/model.layers.4.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7b818ce0bf4f71933ec898f6ac02e708182fddd9
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:420349ce50ef49170e455d495e6cc137a29c84292884a896c78377c5a1b5d61a
+size 4572064
diff --git a/out_tensor/model.layers.4.self_attn.v_proj.safetensors b/out_tensor/model.layers.4.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..56d3d8bd1a63b239812893fde06a280e7fc4a6b1
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec714a3176e61914fd95c14d12bb30be54d2aec3624644f3b5c06c4683ec30fd
+size 1671896
diff --git a/out_tensor/model.layers.40.mlp.down_proj.safetensors b/out_tensor/model.layers.40.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..03d172a676d4f8c5169b95d50653c1c37f1742f5
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:289eea1f70b212f6ab5a8313572bd6cdc2f5c2cf99a42808cc1389a1ffae4e09
+size 32582816
diff --git a/out_tensor/model.layers.40.mlp.gate_proj.safetensors b/out_tensor/model.layers.40.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bdbabc8aae62b0fc13fc6afbb74eb602b36e3bd5
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7c50680d31c197ecf250d4027c008ffa75a1e8da06cfc8f5fd397f45fc009c
+size 31040800
diff --git a/out_tensor/model.layers.40.mlp.up_proj.safetensors b/out_tensor/model.layers.40.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8825d5c3c7cba1845b9b8a72e2d98bd60a45b4e
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9701e31cf0a6e3f4927b26b1e12a06f7c0d9d8a40305e1cdb91a0751f6621a4b
+size 32130320
diff --git a/out_tensor/model.layers.40.self_attn.k_proj.safetensors b/out_tensor/model.layers.40.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..24bf4211b34915b7842f4abee39f843edc6f9785
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ddaffd6fbba084c12f02b6199569e4c1aed8ce97aecc2db50bdce48a7537367
+size 2204576
diff --git a/out_tensor/model.layers.40.self_attn.o_proj.safetensors b/out_tensor/model.layers.40.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0e1ae4505c7765e188872275f65ce7518a1a1992
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e5940b5e57bb32ef275e427b8137a00c17f5c48dfc385f0e329af672ee7838
+size 8766368
diff --git a/out_tensor/model.layers.40.self_attn.q_proj.safetensors b/out_tensor/model.layers.40.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..463d72570d84757ad01446414b3f67a889985aae
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:702d37c873c45ec265821d294a347c1c1fef5f969e5d0a7d161e053f0fc84fc4
+size 8766368
diff --git a/out_tensor/model.layers.40.self_attn.v_proj.safetensors b/out_tensor/model.layers.40.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0b23999fe5c0f0498bc52870b3676a2b381224f
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9f8cc37257824d3a0da338d430106996b39c485ea767e9822d51baa0b8041f8
+size 2671520
diff --git a/out_tensor/model.layers.41.mlp.down_proj.safetensors b/out_tensor/model.layers.41.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7659151a977fbcb615ef1e1714449ee4fe9943c3
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be69bc7a067e762b3c5fda9ab76edf6c05f51f1ee01cf57c4f47d2201a27ccac
+size 39546016
diff --git a/out_tensor/model.layers.41.mlp.gate_proj.safetensors b/out_tensor/model.layers.41.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..703d32b0de46256c0171489022375033f46f0508
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ec37cbc61846c936bbb88175146fccf86a05d1fff7eda0f1cc6a9d583a3f95e
+size 38380832
diff --git a/out_tensor/model.layers.41.mlp.up_proj.safetensors b/out_tensor/model.layers.41.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9415115c3b17c7c902e0f25057a78b78a6b6e9be
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72c38b77c18d5b59c35d8728a5304a90c0a64f7810de6ded149c9dc636e9f636
+size 39470352
diff --git a/out_tensor/model.layers.41.self_attn.k_proj.safetensors b/out_tensor/model.layers.41.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..994db2a9620c4f5f124a3cfa91804c11df367f7a
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a6b0766094f9da8fda0ab7693c2d5e9f43fd70dfa89780f4eef88776ca82d5b
+size 2233632
diff --git a/out_tensor/model.layers.41.self_attn.o_proj.safetensors b/out_tensor/model.layers.41.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1cc9a0ef7a006f7a2c70b4e965d460cbc445c39
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:141f38a4246199c4a015345e00357af7e50fb111218fe43856862c1f79d89818
+size 8881448
diff --git a/out_tensor/model.layers.41.self_attn.q_proj.safetensors b/out_tensor/model.layers.41.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5eea3611809f6eb929849c6cd3c04962f1f11b4a
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:471af9e47fe2b8f981b5dee85032378e8e7088f3543a9b7af09d90dab876fb93
+size 8881448
diff --git a/out_tensor/model.layers.41.self_attn.v_proj.safetensors b/out_tensor/model.layers.41.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..366eaffdd1687df0e0e482190d7a65fa020b6a96
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d09f538883d59d4549e09f348b3832bbd3528d27db51f5870085201407eb30b
+size 2704672
diff --git a/out_tensor/model.layers.42.mlp.down_proj.safetensors b/out_tensor/model.layers.42.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bd64c91d4bfe3a64c37a46889df7bad6d72609c0
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f09d121afcb17b748f57b66d29c8cf541448c849f58ddb083d0c1247cc00f18
+size 45118764
diff --git a/out_tensor/model.layers.42.mlp.gate_proj.safetensors b/out_tensor/model.layers.42.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da033744872d99f830830e716bb767344bc9c7fe
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5d2e34e3eeeb6f5cf2e71206cff8493bfffaf4765f2ef1bdf280e4bf851fa74
+size 44286680
diff --git a/out_tensor/model.layers.42.mlp.up_proj.safetensors b/out_tensor/model.layers.42.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..58558fd8024b00951942dad316356371daae8d12
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce43a2544b38decc111c4ab60220eb9d652bd31a7a56fa987146e41442c3b28b
+size 44286672
diff --git a/out_tensor/model.layers.42.self_attn.k_proj.safetensors b/out_tensor/model.layers.42.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e6d387b11ebeae901ed6bd44e304fbda1bb3944
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec537b2fb9b16b20c16e48c9134f7350fd6057c5cec96a382db27877f12eeafc
+size 3228960
diff --git a/out_tensor/model.layers.42.self_attn.o_proj.safetensors b/out_tensor/model.layers.42.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a59b7a9576e91d7c10a2afcdbc2b57c0eff9f4e1
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a29207bd47fc39953e9d66b8fff1906417a30e24633e35a4db1d72b8237e6cb7
+size 12862760
diff --git a/out_tensor/model.layers.42.self_attn.q_proj.safetensors b/out_tensor/model.layers.42.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..04f78ecdf2471634f556209250c7a763a15c1a5d
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b278464f7a23305cb128149bb7d5bab0b0c23fcd05a1b96149e4feea1f75125
+size 12862760
diff --git a/out_tensor/model.layers.42.self_attn.v_proj.safetensors b/out_tensor/model.layers.42.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f874d89597a6037bf8ea2c5f5cf4af5e2205c73c
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:255922b350f02a1dc90cf9d6020b3630631e1eef2bbe9c082d3d56437aac0ffb
+size 4277536
diff --git a/out_tensor/model.layers.43.mlp.down_proj.safetensors b/out_tensor/model.layers.43.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa30330c0ac7b497c1ac7129552961008832e402
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d285e485dc16686c62acc3faa5528d4bcf6616dad91bda458d75a6b247e8ec1b
+size 45118764
diff --git a/out_tensor/model.layers.43.mlp.gate_proj.safetensors b/out_tensor/model.layers.43.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..055716fee99ec17d6c5a118d42592edfb585015b
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aa70cc40308f68d22647cbd3e7542203b97bc32fb3f62fef6bdb5c7379a0ea0
+size 44286680
diff --git a/out_tensor/model.layers.43.mlp.up_proj.safetensors b/out_tensor/model.layers.43.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eb0c0345dfb6468d8d084b3458f700bc989f730e
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3bb059667e0e6e12d103cbc19b3123eff532231c921c7901d25359497424350
+size 44286672
diff --git a/out_tensor/model.layers.43.self_attn.k_proj.safetensors b/out_tensor/model.layers.43.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1452758ec57dcc96503c3218a8888b33bec73878
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b1a45584b76ab34947440f655966fd570818e33dd77344185847984db4510df
+size 3228960
diff --git a/out_tensor/model.layers.43.self_attn.o_proj.safetensors b/out_tensor/model.layers.43.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..94afddd8d77bc35345cc680ed17e1bafd83ac4f7
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99cb556906f0116b1e95179fa30f85781440a24a044312f5df6a558fa0010cc8
+size 12862760
diff --git a/out_tensor/model.layers.43.self_attn.q_proj.safetensors b/out_tensor/model.layers.43.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..379cf8ac3044c50a4f4c36241c09981e892f4cc0
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:802de5dac176f4d87e8273cba7f2ebaacc995e5a32292446bf36d85f22ef4d44
+size 12862760
diff --git a/out_tensor/model.layers.43.self_attn.v_proj.safetensors b/out_tensor/model.layers.43.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cf600f30752983caf80e3865690f3123bab0aeed
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4c10dfe088626a26c3abb7f43005de57c4fd7f2fd2afdbf296c58ec8ec8f4e
+size 4277536
diff --git a/out_tensor/model.layers.44.mlp.down_proj.safetensors b/out_tensor/model.layers.44.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f5b783b800f1a621b737a536d1a16258b0b13f98
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba890f8d7ad7566245c8edbae2e719a92f5ad31a94b2879b56d28258f73bb03f
+size 46556352
diff --git a/out_tensor/model.layers.44.mlp.gate_proj.safetensors b/out_tensor/model.layers.44.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1b0f3e57ed5a87c908655c0a5cf88e6f99d01faa
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c3fc8652627f645f3ee582b11a1acd82c4f037146b4b1f71a8d2d0251d2d7a
+size 46121688
diff --git a/out_tensor/model.layers.44.mlp.up_proj.safetensors b/out_tensor/model.layers.44.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..edb0e4e3396e1f38a02a7bb63eab68d52320fb70
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ea437ae02c6bff2ccefb25a2e623645ebf2575c6c52e9e09ea348cdfb5c3eba
+size 46121680
diff --git a/out_tensor/model.layers.44.self_attn.k_proj.safetensors b/out_tensor/model.layers.44.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..19970304feb819e14ffd0215249f3d359cbdc2f8
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23c437fe6d48e0e2822359bdfbc03ece3bd40643cd07974e188f72b409b69f8d
+size 3228960
diff --git a/out_tensor/model.layers.44.self_attn.o_proj.safetensors b/out_tensor/model.layers.44.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f69262cbd18944da2bdef1a17584e75b1e98659e
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1fc788c2be633361d9f7fd056df8bf997a0c3deb4201ce4f59bd5139c9fbb21
+size 12862760
diff --git a/out_tensor/model.layers.44.self_attn.q_proj.safetensors b/out_tensor/model.layers.44.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..91a353f604ace54171faf32f5cc8048059d46dda
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7695e99373a506201e7f904df28eb2a83211d6d15f2fe9c0ce96e4fb2ed70e32
+size 12862760
diff --git a/out_tensor/model.layers.44.self_attn.v_proj.safetensors b/out_tensor/model.layers.44.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f06e9ee572202c54eccee03896a9cef4e9ca1fb9
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4876bc9aada3673e22e2875beafb0e9058954c342e3fa0e54cb5a874ac5be6ad
+size 4277536
diff --git a/out_tensor/model.layers.45.mlp.down_proj.safetensors b/out_tensor/model.layers.45.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b36a826a22af82477f5e3fc8576e661761fe3bd9
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a77b4c6efa44c46b804b168a573f6451d43942f2427a57934484bc36525f1c43
+size 45118764
diff --git a/out_tensor/model.layers.45.mlp.gate_proj.safetensors b/out_tensor/model.layers.45.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..74a934892bd7a1b2c42b65ebbf3162db05580ba5
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14964e7ac53913c4fefcf04550b3e0879ff8d0e6a55f31f6698e98a84e877f5b
+size 44286680
diff --git a/out_tensor/model.layers.45.mlp.up_proj.safetensors b/out_tensor/model.layers.45.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fef308fd602d6b2828393041b9061a18962cf2b4
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:272bc17ab23b0476d837988999182c40f774be3e9dcb36e48f6f3d6ca1699342
+size 44286672
diff --git a/out_tensor/model.layers.45.self_attn.k_proj.safetensors b/out_tensor/model.layers.45.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d933fc96f334bf318183129f8898c1cf837295c
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efadd794475544c6498436cad52b75c4367d9674b31dbaf62cba63c4463081ce
+size 3228960
diff --git a/out_tensor/model.layers.45.self_attn.o_proj.safetensors b/out_tensor/model.layers.45.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5027d1d6c4d3761a688d9402c44c46bc92c1128
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b19efe657e75c1e981439cdc7eb74cf07e7c861c78eecd5d417f501f5878ac4d
+size 12862760
diff --git a/out_tensor/model.layers.45.self_attn.q_proj.safetensors b/out_tensor/model.layers.45.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8c31a1de781b3f03066788d338569caf45467f3e
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048f9508a3e4db5462c2ced31f99e47708e193f65de147a172f575f5e7c507ae
+size 12862760
diff --git a/out_tensor/model.layers.45.self_attn.v_proj.safetensors b/out_tensor/model.layers.45.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae44b56e8215ecc357ad6f468524e011fe4d46d1
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a645ba17846444f0b39e17a8a3ee1537d76e24320d31767de1888dc571ab506
+size 4277536
diff --git a/out_tensor/model.layers.46.mlp.down_proj.safetensors b/out_tensor/model.layers.46.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b55d6d6891b17d30b26396605211921a666a8ec
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00f238c1b55dd1ce01762dbc638a85e77fb8d60c6492b662b24ef15e14b4aa2e
+size 25619616
diff --git a/out_tensor/model.layers.46.mlp.gate_proj.safetensors b/out_tensor/model.layers.46.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9feab3a97cc696b6664cd684a43e002606778625
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c50d23d02eff65b83be1ae034dce239c3fe9f061c77c6833af6df6b83a1c76d
+size 23700768
diff --git a/out_tensor/model.layers.46.mlp.up_proj.safetensors b/out_tensor/model.layers.46.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..33bfe9e483afe47779dc89f0109b365249ee329f
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0fe113f9a7914197cb650d1fa118750edf74c9d01b506a5dc918302d722b5cd
+size 24790288
diff --git a/out_tensor/model.layers.46.self_attn.k_proj.safetensors b/out_tensor/model.layers.46.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1a0b33dfa7ebb9b200f0bedcab851b4324e1dd1
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:101c5a941bca88cf7b7f4da3e7388460a73f92bea0929c4c730a5ecfec54c017
+size 1156000
diff --git a/out_tensor/model.layers.46.self_attn.o_proj.safetensors b/out_tensor/model.layers.46.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4494b03a62770327b9fa62a596ca9e1c82ed082a
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:322c0c46d1a3f218b7492b33d981f7e946f16844b34d52709be30bd41e7a795a
+size 4572064
diff --git a/out_tensor/model.layers.46.self_attn.q_proj.safetensors b/out_tensor/model.layers.46.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49c5304ff69d1cbd6039a17413982db2212aa8b9
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fa88cb5d43389150456f2e8572025e4fcd8085be8c6aea20098717e1630b881
+size 4572064
diff --git a/out_tensor/model.layers.46.self_attn.v_proj.safetensors b/out_tensor/model.layers.46.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a277499759014aeb9fb5a363d64f0fe19fc640da
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:344704c4e11e76b81fc9c21bcc52c7d3ec0e962174e0de96a78b707d77fb0cf3
+size 1671904
diff --git a/out_tensor/model.layers.47.mlp.down_proj.safetensors b/out_tensor/model.layers.47.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4c7b9718fd493e68c47e010a54f55dc1d2d4504f
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdf6fa25da14ca8b00457eea773dd170a1ce4b4426636ecf29fb203bef5b2197
+size 25619616
diff --git a/out_tensor/model.layers.47.mlp.gate_proj.safetensors b/out_tensor/model.layers.47.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..521d01924fe333e874001e101217dc9fe428e825
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73a72c460ecb7c6f5308e63d69c9f5e754f7a5022e55563f4f35b5508679b970
+size 23700768
diff --git a/out_tensor/model.layers.47.mlp.up_proj.safetensors b/out_tensor/model.layers.47.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..06ffb28f3ab1030bdc8e8579de85940e3ce65b9a
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c623a394a40f5d966f316c69cf6bfc4790da99934419f247f7682876a2b30524
+size 24790288
diff --git a/out_tensor/model.layers.47.self_attn.k_proj.safetensors b/out_tensor/model.layers.47.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..70fda4a85e7180d5ac33e970eaa6c04ef0929d0e
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9639194447f48b48a7374de6d08e26d46f7155240f2fae8ddc7bffa7b5e76d3f
+size 2204576
diff --git a/out_tensor/model.layers.47.self_attn.o_proj.safetensors b/out_tensor/model.layers.47.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fd73a34aa8f933801961990f3551f299e4dbf746
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4e63c5119bd9c22ce1987c957e5a799cf4404e68fe5a9b407c5dc9452009d41
+size 8766368
diff --git a/out_tensor/model.layers.47.self_attn.q_proj.safetensors b/out_tensor/model.layers.47.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f58643bb72ecd409f0925fafac9f1fc056d39759
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a572992ebc5e4d9c4a015f18d7a90df38e506f4b014ae3e1d86daf9aedcc5d88
+size 8766368
diff --git a/out_tensor/model.layers.47.self_attn.v_proj.safetensors b/out_tensor/model.layers.47.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1295f9a5935eb2a43d39407bf261977bfc43c72a
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e693cf67c1b2c3aef8277359174e841317f75a3f7be25064eeb5fbb53fe4362
+size 2233632
diff --git a/out_tensor/model.layers.5.mlp.down_proj.safetensors b/out_tensor/model.layers.5.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e751b4ff6b1b410c5e1bd73e02d7de2dccefc3b6
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:374f531bb60d39ccb0600964fbcdab2eb2b053a44f698bf6c724e225cf5838d8
+size 25619608
diff --git a/out_tensor/model.layers.5.mlp.gate_proj.safetensors b/out_tensor/model.layers.5.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1597a54df05a036a9b08d690f2c21ffae590eabf
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a95ecb864e3f127629188ad8dc640813bb88b8f8fe1c3b1a6ce77f114cdeeb5
+size 23700760
diff --git a/out_tensor/model.layers.5.mlp.up_proj.safetensors b/out_tensor/model.layers.5.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..877756b0de61109e0821973f0a3bf9c1c3084651
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66b196a41d9bc70fb7e7840755d6c3ee232290465a55220add0ae6774ff90341
+size 24790288
diff --git a/out_tensor/model.layers.5.self_attn.k_proj.safetensors b/out_tensor/model.layers.5.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..88ff409a003881360ad8cd20ef5f1a63c231e366
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0802593d0dabfec0fc5c4d8830d8297d332f01ee47cc398746f695f366ff3309
+size 2204568
diff --git a/out_tensor/model.layers.5.self_attn.o_proj.safetensors b/out_tensor/model.layers.5.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6902d6430e3985cb3694ef96863f1322fbbec10d
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:588805bbf12e3f8450064de50be2a48e6bb156770fca76a6a97badcf4928d915
+size 8766368
diff --git a/out_tensor/model.layers.5.self_attn.q_proj.safetensors b/out_tensor/model.layers.5.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41f093ab5aa5549c2b45c2df527d2e0cd0b6dcf5
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0f0160e8ac8959a146eef02422e3dc2213e50b1c369ed9960a3b1e1bf89229
+size 8766368
diff --git a/out_tensor/model.layers.5.self_attn.v_proj.safetensors b/out_tensor/model.layers.5.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4b3dfbf8039499f44de725b12353d84ab5e1845c
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e54dd5be8e2297c7775b6fbe3116ba94c7887cad3a27adee139480604e2871
+size 2233632
diff --git a/out_tensor/model.layers.6.mlp.down_proj.safetensors b/out_tensor/model.layers.6.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16ac7242a1dfe4f6018e9b638e5eba71161a7fbb
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:822262ba77f7876ebf5b1cf9cae83b8e225bd8e060fb007a3e45a9d0265a136d
+size 32582808
diff --git a/out_tensor/model.layers.6.mlp.gate_proj.safetensors b/out_tensor/model.layers.6.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..51af2f25b4e4a8a5971f1665b8f95dcf247e2ce4
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d04d6b9b8c0a4da8acf2b9083e0cd9d2febd54516c605b85ecce818522fc0ac9
+size 31040792
diff --git a/out_tensor/model.layers.6.mlp.up_proj.safetensors b/out_tensor/model.layers.6.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1f0e4470070bbdfc9834f9921dcdef72e0c56a0
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2739b667ba238f82f7f5b3ef2183afb479c4776188510b06145a407bf7e9d0
+size 32130320
diff --git a/out_tensor/model.layers.6.self_attn.k_proj.safetensors b/out_tensor/model.layers.6.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3d04ada55009dbd174fa8698459781f711cb2ee
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:203ba0cd6e6a16d28e997fc2a413b446e543ed455d09fc80ec62a17ecc266455
+size 2233632
diff --git a/out_tensor/model.layers.6.self_attn.o_proj.safetensors b/out_tensor/model.layers.6.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5b32685502baa8b32048b2bd501a487db78bd01
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1214a44c97a9d0f54aba2395192cea12078936212c77226beade82b7876fe39
+size 8881440
diff --git a/out_tensor/model.layers.6.self_attn.q_proj.safetensors b/out_tensor/model.layers.6.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25aa8f983ea717f90c311dcb2cf7994b1cec3e9b
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38b88baa0fae4d4cd82d5ed19cc49b1c4ac3a9bdd3b706d96051aacd3e4e5755
+size 8881440
diff --git a/out_tensor/model.layers.6.self_attn.v_proj.safetensors b/out_tensor/model.layers.6.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9beeee5aef9e1b96c237d6fb264cdb93a264bfd0
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba723c8434524f2fe61f5adff02fa6822c331253c652c512269dae25cab31c7
+size 2704672
diff --git a/out_tensor/model.layers.7.mlp.down_proj.safetensors b/out_tensor/model.layers.7.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3d4bc56d309aa409b76afb8dff11f7d009295531
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e311a597385c28d0d848bffef2e8ba2952a2f304a0ad72e8b726025fed83ea2d
+size 32582808
diff --git a/out_tensor/model.layers.7.mlp.gate_proj.safetensors b/out_tensor/model.layers.7.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..35394681dc424685a2874bb6833404d35b8f3d17
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3abeb45eaa906807666acbb54f81504647ab94bfccd18bdd260bee115b0a1ecd
+size 31040792
diff --git a/out_tensor/model.layers.7.mlp.up_proj.safetensors b/out_tensor/model.layers.7.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..81cafe2984f1224e6eee40d06f6eb184af2c7839
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f801d41c159fbe9605bae1ed74fb542eadc9f293c79f23b8e27b6ee9f35ff73
+size 32130320
diff --git a/out_tensor/model.layers.7.self_attn.k_proj.safetensors b/out_tensor/model.layers.7.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0af29fc414c05da352f8ff18c4710d61f967677b
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e9deb21193bc175a20cff6e94474cf09915fd9a28d7e8e2c085220e683bd30d
+size 2233632
diff --git a/out_tensor/model.layers.7.self_attn.o_proj.safetensors b/out_tensor/model.layers.7.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f9448540fbcef3a0bfcaa9bc918849fa295dd73
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac02b53cf22625baa74caf1d68911eb5a6646dc6758f39aae2b32e0f6153b60
+size 8881440
diff --git a/out_tensor/model.layers.7.self_attn.q_proj.safetensors b/out_tensor/model.layers.7.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2bf03db51c86f953f192ce493076222e26eabebb
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fcfd38bf8fec9104f7f407a47607b02184f7ad410534168c13e7f24f4bdff5c
+size 8881440
diff --git a/out_tensor/model.layers.7.self_attn.v_proj.safetensors b/out_tensor/model.layers.7.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e91476a48e2334714ee8bfe09d154ac7be49995f
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25233a78e868e615f76520eecf227f438dbb311129d77da80f6d84006088e616
+size 2704672
diff --git a/out_tensor/model.layers.8.mlp.down_proj.safetensors b/out_tensor/model.layers.8.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8827e3892c2c1d557550fd3bb48e87eb817ff765
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f525ee3b464b7211677b6d5ba92f7eca9b9f3b4c9f9e08aa40420171875e1b5
+size 32582808
diff --git a/out_tensor/model.layers.8.mlp.gate_proj.safetensors b/out_tensor/model.layers.8.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..526befa19be5a7747b0fb5263a1f4837cb55387b
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfc295662cd2dc2ae64ca8bcb72bccd88208e9028ce392ee5300575855a85a8a
+size 31040792
diff --git a/out_tensor/model.layers.8.mlp.up_proj.safetensors b/out_tensor/model.layers.8.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a0d79c072f430a417c5873aed2dc411e7952121
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bef4cfe7763545a9675fd1837ecb868a18733c76eda295e11c5e54f5cf99763
+size 32130320
diff --git a/out_tensor/model.layers.8.self_attn.k_proj.safetensors b/out_tensor/model.layers.8.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..53c84103176df4a6989f5ad97f801dbcdbb1d0c4
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf50402462bc4ba5c1bfb09a0ed9b0cecb8365cfecdaaff18a03eb332d01f5e3
+size 2233632
diff --git a/out_tensor/model.layers.8.self_attn.o_proj.safetensors b/out_tensor/model.layers.8.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ab0079cb602e40898c395badcc62097ca4a707ce
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0933b3c0f18c8ed7ad6166dfddd3e10f853cc0150d076a6877326f452e8558a3
+size 8881440
diff --git a/out_tensor/model.layers.8.self_attn.q_proj.safetensors b/out_tensor/model.layers.8.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8e0e4ce918c6bf7d4235d1b49ffff873f790ef38
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81298380c96f86a9e5668b246bb2a0e51d920b39fbdae0ade3ebd3939b37086e
+size 8881440
diff --git a/out_tensor/model.layers.8.self_attn.v_proj.safetensors b/out_tensor/model.layers.8.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..922a617f0162e46ce7ccb14e664b6e140bc8a94b
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be9ed40454625098d23a1c7bf3ae6a8e359eac783774c8629e9480d7756488de
+size 2704672
diff --git a/out_tensor/model.layers.9.mlp.down_proj.safetensors b/out_tensor/model.layers.9.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5327736ec04a3015593dece3b8cd12ef614d7b25
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd33887421f014e0aa4bb814e6c2335a467657b8bf4c070d8a91edeb733c29eb
+size 39546008
diff --git a/out_tensor/model.layers.9.mlp.gate_proj.safetensors b/out_tensor/model.layers.9.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4c4b8111bf6769380ea503681c2e7f1a6c5b9a6e
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2672ebac14e65731cff906f6d0890bcb7319742725a6f2aaa6d81593cd90a98b
+size 38380824
diff --git a/out_tensor/model.layers.9.mlp.up_proj.safetensors b/out_tensor/model.layers.9.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25fd476fca7725cc851a2634a4217bcf0e8b42f5
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83e4ec129a56ab38e4c567a64d27e0b4274b450884cf0a2cd70140933e78b0c6
+size 39470352
diff --git a/out_tensor/model.layers.9.self_attn.k_proj.safetensors b/out_tensor/model.layers.9.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..abfac48ad32ed71a2769889a5b5a5d836fca3b0d
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1401de4f6332eff29ef946d06d5aab9a3bb08e38fe73a101d7f895cc9ba9da8
+size 2233632
diff --git a/out_tensor/model.layers.9.self_attn.o_proj.safetensors b/out_tensor/model.layers.9.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..330a6b4e37f808b67d63873d6b2e91b33d958261
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:458458345481ac4477056c7ffed08b2a943fa02326d85626b52869edffe8253c
+size 8881440
diff --git a/out_tensor/model.layers.9.self_attn.q_proj.safetensors b/out_tensor/model.layers.9.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e328a1dd0f624c0f51a4c73956a15b5cf4198b10
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65d55d0239503e5deea774763fb8278063a89d9700b1f169b1304fc0114c15dd
+size 8881440
diff --git a/out_tensor/model.layers.9.self_attn.v_proj.safetensors b/out_tensor/model.layers.9.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3d24f153c31ddc03780acf020b21b75520f9e51e
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5628059882379abee19190dfa33f31d4c18a33f10a0ccc67f707d9f3c6f549e4
+size 2704672
diff --git a/output.safetensors b/output.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c825be5791e0e68a2d6163dc73894fe86a4c2ea2
--- /dev/null
+++ b/output.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3963c904a7c408b7a538423634ec2741c70b1e4c718ea8cf7f1b6ac3399d99f
+size 6909731928
diff --git a/rubra-11b-h.png b/rubra-11b-h.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3d1fae01bcb58bd6515628004b9ee6079f5533a
--- /dev/null
+++ b/rubra-11b-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e273bdc068c7f7065c1c5eba64e19ac2b4da620a59a966265026713902ca905
+size 1735652
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..492d4b2966a1763442d426d880dbc29f94906e4c
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed358013d41334780151bb4ab85d8770bb7316da
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,46 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{{ '' + system_message }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '' }}{% elif message['role'] == 'function' %}{{ '<>' + content + '' }}{% elif message['role'] == 'observation' %}{{ '[INST] <>' + content + ' [/INST]' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}