{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.01182393915951252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.010113885626196861, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.005040150135755539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.005290213041007519, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.005290108732879162, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0020977903623133898, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.011094657704234123, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.010029289871454239, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.005559930112212896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.004889913368970156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.00508610624819994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.005270845722407103, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.004888507071882486, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.002876731799915433, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.002196687739342451, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0027975954581052065, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.00199302239343524, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0017052155453711748, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0019418542506173253, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0016394125996157527, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0018547893268987536, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.001941824913956225, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0014603257877752185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0016206668224185705, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.01182393915951252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.004888507071882486, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.011938773095607758, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.01024575810879469, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.005020024720579386, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.005249407608062029, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.005249103531241417, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.001953608589246869, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.011431971564888954, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.01013721153140068, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.005509513430297375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.004816411063075066, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.005018702242523432, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0052352542988955975, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.004813939332962036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.002773058833554387, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.002012677723541856, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.002732795663177967, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0017733253771439195, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0014462930848821998, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0017127664759755135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0013642358826473355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0017086609732359648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0017120464472100139, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0012790884356945753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0013395249843597412, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.011938773095607758, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.004813939332962036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.11996563524007797, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07152383029460907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04374569281935692, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.04960933327674866, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04957224428653717, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.021957075223326683, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08039216697216034, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06578350812196732, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05650609731674194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.031937774270772934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.038324009627103806, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04359197989106178, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0316188707947731, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.024117659777402878, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.02197743020951748, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02234594151377678, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012836852110922337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011825651861727238, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00953544583171606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007870473898947239, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.011457061395049095, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.009481536224484444, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006577099207788706, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006215796805918217, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012836852110922337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.006215796805918217, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11966709792613983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07601750642061234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.048712849617004395, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.051081232726573944, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.049200281500816345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.02457805722951889, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07664486765861511, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06832914054393768, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05676627904176712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03397602587938309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03648901358246803, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03927142918109894, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.03306364268064499, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02460542507469654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02219315804541111, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01984984241425991, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01424134150147438, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013266882859170437, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01177523098886013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01024183165282011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.011014753952622414, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.011647836305201054, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007921118289232254, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009040821343660355, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013266882859170437, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007921118289232254, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.12732891738414764, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.11806099116802216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.11527976393699646, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.10467647761106491, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.057236261665821075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.054526034742593765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0639176145195961, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.05897137150168419, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0580424889922142, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05192605406045914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.04957941174507141, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03246914967894554, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.02830331400036812, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.027578813955187798, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.02741367742419243, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.016359621658921242, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.014904339797794819, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.014826133847236633, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01394427940249443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.013844535686075687, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.00915810838341713, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009995196014642715, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.00889102928340435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007770108059048653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01394427940249443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007770108059048653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.15027090907096863, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14131589233875275, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13856488466262817, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12587518990039825, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06799335777759552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06534895300865173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07535744458436966, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06958407908678055, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06876865029335022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06212745979428291, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0591157004237175, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03799208998680115, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0329744890332222, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03230734542012215, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03215133398771286, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.018947338685393333, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.016574716195464134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01650262624025345, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015435338951647282, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.015337426215410233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009952424094080925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009936218149960041, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.009684094227850437, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006564699113368988, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009952424094080925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006564699113368988, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.0868382379412651, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.07363718003034592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.06627893447875977, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.05825028195977211, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.03847834840416908, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.03263307362794876, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.05275072902441025, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.04565807804465294, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.04023352637887001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.031580861657857895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.030570138245821, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.025635894387960434, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.02190837450325489, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.019139239564538002, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.018451320007443428, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.01333101000636816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.011159395799040794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.010912278667092323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.010021329857409, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.009652565233409405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.00815831869840622, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.008620751090347767, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.007410453166812658, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.007208097260445356, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.01333101000636816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.007208097260445356, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.02249637432396412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.01484830491244793, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.008372031152248383, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.00890374556183815, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.008659861981868744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.00372491218149662, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.015479489229619503, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.014042426832020283, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.01050356775522232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.006822916213423014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.007325408514589071, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0078184949234128, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.006691112648695707, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.004354950040578842, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.003596351481974125, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.003931586164981127, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0025271361228078604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.002222125418484211, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0022245533764362335, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0017786244861781597, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.002121053636074066, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0022044451907277107, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0012855012901127338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0016171201132237911, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.008372031152248383, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.004354950040578842, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.019816355779767036, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.013353170827031136, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.007194305770099163, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.007660371251404285, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.007483340334147215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0030094338580965996, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.014418169856071472, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.012788355350494385, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.009156235493719578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0061274184845387936, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.006657695863395929, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.007215922698378563, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.00603488739579916, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.003759856568649411, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.0029906940180808306, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0036235281731933355, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.002160861389711499, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0018423802684992552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0019332922529429197, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.001473527168855071, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0019308619666844606, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0019187966827303171, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0010849728714674711, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0013495741877704859, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.013353170827031136, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.00603488739579916, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.14521704614162445, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09427900612354279, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06690874695777893, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06415142863988876, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.05911368876695633, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03346116840839386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.08790292590856552, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.07974012196063995, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.06893956661224365, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04130319505929947, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04251083359122276, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04490135610103607, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0380859449505806, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02872639335691929, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.026111803948879242, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02245294116437435, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.015241612680256367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.014141545630991459, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01188777294009924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00989469513297081, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.011612964794039726, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.011323701590299606, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.007669506128877401, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007297936826944351, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.014141545630991459, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007297936826944351, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.16304950416088104, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1316402554512024, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.11799882352352142, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09727711230516434, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.07390543073415756, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.06048121303319931, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09520379453897476, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.084551602602005, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.07749654352664948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.05510447919368744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.05124340206384659, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.049014899879693985, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04116075485944748, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0364266075193882, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.03523475304245949, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02473546378314495, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.020279835909605026, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01974773034453392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.016963165253400803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.016210200265049934, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013784454204142094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01482389122247696, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.011979911476373672, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011712285690009594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013784454204142094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011712285690009594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17462044954299927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16452957689762115, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16151753067970276, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14592763781547546, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08089292794466019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07759340107440948, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0896354466676712, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08253666758537292, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08167408406734467, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07333747297525406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06957373023033142, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04592854157090187, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040033627301454544, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03930816799402237, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03914543613791466, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023182911798357964, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021494779735803604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021405288949608803, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020125456154346466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.020034687593579292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013116713613271713, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014661027118563652, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012822739779949188, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011702976189553738, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013116713613271713, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011702976189553738, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.21093058586120605, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19939343631267548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1959734559059143, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.17771200835704803, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09807807207107544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09434375911951065, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10825344920158386, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0998704805970192, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09901032596826553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08915417641401291, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08459556847810745, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.055247802287340164, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.048074398189783096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.047327496111392975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04713457077741623, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02765531837940216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.025227338075637817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02514626272022724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023558905348181725, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023451244458556175, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015100331977009773, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01644865609705448, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01478053443133831, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012463399209082127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012463399209082127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012463399209082127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.020802104845643044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.020399026572704315, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.005358131602406502, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.004844418726861477, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.004041637759655714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0026767782401293516, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.02060789428651333, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.019972553476691246, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.004159925971180201, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0036516916006803513, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.003607576945796609, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0035591593477874994, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0033245806116610765, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0026048063300549984, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0018978192238137126, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0024776372592896223, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.0015568311791867018, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.0009644622332416475, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0015058449935168028, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.0008736991439945996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.0015405685408040881, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.001466308138333261, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0007267043692991138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0007514580502174795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.005358131602406502, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.004844418726861477, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.05884005129337311, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0451032891869545, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.03776656836271286, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.03372221440076828, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.025100361555814743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.018579212948679924, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0353214368224144, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03206360340118408, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.02768515981733799, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.019602369517087936, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.01928938925266266, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.017895102500915527, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.015295583754777908, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.012173682451248169, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.011334790848195553, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.008957099169492722, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.006477863993495703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0061059496365487576, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.005434473976492882, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.00485975481569767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.004675903357565403, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004675403703004122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.003444688394665718, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0031487294472754, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.012173682451248169, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.004675403703004122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06150631234049797, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.04516737535595894, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.035476114600896835, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.03243042528629303, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.025551553815603256, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.01726830191910267, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.037788283079862595, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.034374888986349106, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.028753137215971947, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.019624054431915283, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.01972116157412529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.019101137295365334, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01633821241557598, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.012450468726456165, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.011353784240782261, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009545758366584778, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0066859545186161995, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.006194797344505787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.005580513272434473, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.004821136128157377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004981100559234619, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005014663562178612, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003469625720754266, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003370691556483507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.012450468726456165, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.004821136128157377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.17437806725502014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1426936239004135, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.13005676865577698, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.11376912146806717, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.07783699780702591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.06448757648468018, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.09769425541162491, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.0884263888001442, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.08287769556045532, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.061606667935848236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.05838631093502045, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.04957081750035286, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04225122183561325, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.03747893124818802, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.03623846918344498, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02486451342701912, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.0193284023553133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.018796298652887344, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.016045309603214264, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.015180500224232674, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012993808835744858, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012426929548382759, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.010915819555521011, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00786642637103796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.012993808835744858, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00786642637103796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1726834774017334, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1506815254688263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1403118073940277, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.12141824513673782, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.07878422737121582, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.06916943937540054, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09913215041160583, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08954830467700958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.08237724006175995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06592198461294174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06196640804409981, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0515018031001091, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04372792690992355, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0388932041823864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.03769968822598457, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02629859186708927, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02178324945271015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.021261045709252357, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01949843019247055, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.018781105056405067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015128536149859428, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01597810722887516, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.013443466275930405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012741008773446083, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.013443466275930405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012741008773446083, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2076948881149292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.19491584599018097, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1911005824804306, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.17202304303646088, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09717115014791489, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09275217354297638, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10792756825685501, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09949140250682831, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0983724519610405, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08718264847993851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08233066648244858, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05513118952512741, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.047727737575769424, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.046705253422260284, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04647357016801834, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.027585633099079132, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.024448558688163757, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02434370294213295, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02249581553041935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02234688214957714, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014846807345747948, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01534898579120636, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.014432431198656559, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010968023911118507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010968023911118507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010968023911118507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2454940676689148, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23073787987232208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22630853950977325, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20385362207889557, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11487749218940735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10972610116004944, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1272697150707245, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11743690073490143, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11624275892972946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10312299430370331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0973350778222084, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06482360512018204, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.056088656187057495, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.054946351796388626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05467710644006729, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03236578032374382, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.0282101072371006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028091037645936012, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025840673595666885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025673512369394302, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017091883346438408, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016915403306484222, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01661084219813347, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011170610785484314, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011170610785484314, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011170610785484314, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.18705697357654572, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16264401376247406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1534045934677124, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.13342152535915375, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0839143916964531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.07477465271949768, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.10008563101291656, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09134984761476517, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.08701024204492569, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0697527676820755, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.06533204019069672, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.050800859928131104, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04370687156915665, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0404052659869194, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.03958882391452789, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02552192658185959, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.021403122693300247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.021109675988554955, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.01871515065431595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.018200676888227463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013857456855475903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.014178423210978508, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.012722913175821304, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010245686396956444, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013857456855475903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010245686396956444, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.04571445658802986, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.03755614906549454, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.032934267073869705, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.028938360512256622, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.020150573924183846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.01621631532907486, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.027448760345578194, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.024934129789471626, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.021607665345072746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.016345983371138573, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.015799688175320625, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.013951371423900127, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.011912166140973568, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.009783122688531876, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.009219776839017868, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.00699281133711338, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.005218513775616884, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.004958693869411945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004493608605116606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.004120040684938431, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0037078321911394596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.003714032005518675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0029028374701738358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0025599978398531675, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.013951371423900127, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.004493608605116606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0467170886695385, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.037228040397167206, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.031231161206960678, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.027565548196434975, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.02008243091404438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0152354771271348, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.028961453586816788, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.026226932182908058, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.021860547363758087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.016134141013026237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.015898460522294044, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01467193104326725, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.012487504631280899, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.009714988991618156, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.008949670009315014, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.007343110628426075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.00511582475155592, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0047499751672148705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00434912322089076, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0038258121348917484, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0038145966827869415, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0036902124993503094, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0027324180118739605, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0023570461198687553, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.012487504631280899, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00434912322089076, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.18432527780532837, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.15399032831192017, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.14192764461040497, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.12375128269195557, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.08265623450279236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.07031679153442383, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10174277424812317, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09324871003627777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.08741117268800735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.06664137542247772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.06285255402326584, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05169158801436424, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04448793828487396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0397176668047905, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.038524556905031204, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02581859938800335, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.020355161279439926, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01983688212931156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.017091356217861176, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.016270574182271957, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013309330679476261, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.012865151278674603, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.011302012018859386, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007971511222422123, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013309330679476261, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.007971511222422123, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.17371094226837158, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1519382745027542, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1438230574131012, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.12127795815467834, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.07977032661437988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.07097547501325607, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.09485816210508347, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08629491180181503, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.08230303972959518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06518993526697159, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.05919366329908371, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.04879438504576683, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.041882604360580444, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.03898634389042854, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.03827330470085144, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.024665793403983116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.021320423111319542, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.021011410281062126, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018653713166713715, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.018212350085377693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0137765659019351, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014866748824715614, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.012685753405094147, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011613276787102222, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0137765659019351, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011613276787102222, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.21623985469341278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2026878148317337, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.19858992099761963, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1789373755455017, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10146931558847427, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0966973826289177, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11272519081830978, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10401047021150589, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.10279270261526108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09091772884130478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08571838587522507, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05749158933758736, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04982426017522812, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04868917912244797, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04841920733451843, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.028742417693138123, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.025280065834522247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.025162577629089355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.023179316893219948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023010751232504845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015301146544516087, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015583192929625511, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.014831957407295704, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010784960351884365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010784960351884365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010784960351884365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25372418761253357, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23806919157505035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23336909711360931, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21047449111938477, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11915434151887894, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11364208161830902, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13219910860061646, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12201908230781555, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12066348642110825, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10686163604259491, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10087291151285172, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06746562570333481, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.058353159576654434, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05708632245659828, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0567818246781826, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03372420370578766, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02935979701578617, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02922873944044113, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026877356693148613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026689089834690094, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017878975719213486, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01768440008163452, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017345480620861053, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011750929057598114, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011750929057598114, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011750929057598114, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.20717249810695648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18219558894634247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17289553582668304, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.15158459544181824, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09365303069353104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08429841697216034, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1107480451464653, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10113703459501266, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.09674964100122452, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07874701172113419, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07404869049787521, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.056192513555288315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04837727174162865, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.045018717646598816, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04420444369316101, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.02823393978178501, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.023698629811406136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.023398742079734802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.020832795649766922, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.020306222140789032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015305561013519764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015440743416547775, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014166099950671196, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010960178449749947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014166099950671196, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010960178449749947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.06110944226384163, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.05087149888277054, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.04477160423994064, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.03959675878286362, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.027085402980446815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0220224279910326, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.037222910672426224, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03357170894742012, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.028934478759765625, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.022322574630379677, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.021630067378282547, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.018960297107696533, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.016076821833848953, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.01319345273077488, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.012433103285729885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.009529889561235905, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.007116909604519606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.006764759309589863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.006220567971467972, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.005734972655773163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005091416649520397, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005143340211361647, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.003993410617113113, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0036631617695093155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.01319345273077488, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005091416649520397, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06029275059700012, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0486941821873188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.041747547686100006, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.03702644258737564, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.026072530075907707, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.020365364849567413, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.037235841155052185, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.033360935747623444, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.02835194207727909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02127429097890854, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.020856253802776337, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.01893676072359085, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.015957754105329514, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.012665302492678165, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.011769150383770466, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.009525531902909279, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.00675187399610877, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.00635529262945056, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00583498552441597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005253225099295378, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.004996396601200104, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004893038421869278, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.003716981504112482, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003322293283417821, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.012665302492678165, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.004893038421869278, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.1996086984872818, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.16960355639457703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.15813235938549042, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.13850639760494232, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09017794579267502, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.07812946289777756, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10947313159704208, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10026904940605164, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0948696881532669, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07385332882404327, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.06955187022686005, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05575942248106003, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04799393564462662, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04336375370621681, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.042230017483234406, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02786031737923622, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022356975823640823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.021866681054234505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019080527126789093, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018289916217327118, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014457089826464653, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014211796224117279, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01253074873238802, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009128919802606106, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014211796224117279, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009128919802606106, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.177953839302063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.15673410892486572, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.14916014671325684, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.12705132365226746, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.08161676675081253, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.07342343032360077, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0977221429347992, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08791422098875046, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.08415968716144562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06788162142038345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06254886090755463, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.050389740616083145, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.04282595217227936, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.03999956697225571, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.039329130202531815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025547293946146965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.021944886073470116, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.0216367170214653, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.019404398277401924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.018988387659192085, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014323413372039795, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01534169353544712, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01326841488480568, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012061351910233498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01326841488480568, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012061351910233498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.19519874453544617, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.181936576962471, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1775345802307129, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1596507728099823, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09147679805755615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.0865762010216713, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10282744467258453, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0947105884552002, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09297698736190796, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08154723048210144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07691708207130432, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05243732035160065, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04541226103901863, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.04395914822816849, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04361913725733757, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0262746699154377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.022846195846796036, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02269110642373562, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.020860128104686737, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.020643094554543495, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014032622799277306, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014164062216877937, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.013447749428451061, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009768357500433922, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014032622799277306, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009768357500433922, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2543099522590637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23770830035209656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23239488899707794, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20934216678142548, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11944035440683365, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11338459700345993, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13339877128601074, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12304950505495071, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12122387439012527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.1067228615283966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10063808411359787, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06796099245548248, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05887816101312637, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05724426731467247, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05686335265636444, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03398125246167183, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02939792536199093, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029227860271930695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026811009272933006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026567980647087097, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017878927290439606, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01768524758517742, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017191460356116295, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01160223688930273, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01160223688930273, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01160223688930273, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21698768436908722, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19408051669597626, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1857122778892517, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1641385406255722, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09882299602031708, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09037678688764572, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11593566834926605, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1054561659693718, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10158717632293701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08468915522098541, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07965472340583801, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.058901142328977585, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.050514381378889084, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.047537826001644135, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04681241512298584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029617145657539368, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02504543773829937, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02477765455842018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022327274084091187, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021864905953407288, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01601511240005493, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01626022905111313, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014950213022530079, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01163895707577467, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01163895707577467, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01163895707577467, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07165929675102234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06165524199604988, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.05593080818653107, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.04942796379327774, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03221387043595314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.027402153238654137, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04248568043112755, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.03851009160280228, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03391056880354881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.027103310450911522, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.026007646694779396, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.021533317863941193, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.018398115411400795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.015621975064277649, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.01489446684718132, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010796038433909416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.00829885434359312, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007959287613630295, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007315029855817556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.006850111298263073, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005749698728322983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005768806207925081, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004703111480921507, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004005253314971924, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.010796038433909416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004703111480921507, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06732826679944992, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05678737163543701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05031605064868927, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.044354356825351715, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.029742613434791565, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.024488231167197227, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.040506407618522644, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03683177009224892, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03165080398321152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.024758312851190567, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.023942751809954643, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020517803728580475, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.017529042437672615, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.014353216625750065, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.013502907939255238, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01026358362287283, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007491075433790684, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.007085335440933704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006509511265903711, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.005947005935013294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005301887635141611, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005168434698134661, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004071221686899662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003306141123175621, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.013502907939255238, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005168434698134661, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21379804611206055, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18819299340248108, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17868436872959137, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.15729166567325592, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09810527414083481, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08799300342798233, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11549636721611023, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10607273876667023, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10173361748456955, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08250764012336731, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07728415727615356, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05868062004446983, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.050623487681150436, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.047009117901325226, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04611654207110405, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02925410494208336, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.023933738470077515, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.023530930280685425, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020723169669508934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02012336440384388, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015039880760014057, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014492387883365154, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013485131785273552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008801857009530067, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013485131785273552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008801857009530067, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21297292411327362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18971838057041168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18127688765525818, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1574118435382843, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09841940551996231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08868487179279327, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.114846371114254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10499585419893265, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10089368373155594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08251592516899109, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07600553333759308, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05914030969142914, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05059080570936203, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04756889492273331, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.046830061823129654, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02976205386221409, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025159617885947227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.024812448769807816, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022071857005357742, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021609043702483177, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01622331701219082, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01646493747830391, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015001914463937283, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011931962333619595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011931962333619595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011931962333619595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.20333696901798248, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.18967245519161224, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.18509703874588013, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.16659407317638397, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09537000209093094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09030450880527496, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1070079579949379, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09873612970113754, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.09685368090867996, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08503970503807068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08022856712341309, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.05455749109387398, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.0472639724612236, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.045740511268377304, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.045388225466012955, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.027293970808386803, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.023608656600117683, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.023440375924110413, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02152187190949917, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.021291986107826233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.014428449794650078, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01440246868878603, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01381726935505867, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009647591039538383, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01381726935505867, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009647591039538383, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2578112483024597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24132472276687622, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23605941236019135, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21276657283306122, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12125024944543839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11518719047307968, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1355217695236206, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12493296712636948, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12297461926937103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10846512764692307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10246031731367111, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06901661306619644, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.059750691056251526, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.058094386011362076, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.057703301310539246, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03451832756400108, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029807647690176964, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029630891978740692, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027197271585464478, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02694573998451233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018164049834012985, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01787649653851986, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017490029335021973, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011653538793325424, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011653538793325424, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011653538793325424, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22553662955760956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20345379412174225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19513528048992157, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17302630841732025, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10329249501228333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09494829177856445, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12076937407255173, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11004600673913956, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10595012456178665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08925426006317139, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08412238955497742, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.061567626893520355, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.052701592445373535, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.049638781696558, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04890459403395653, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030942518264055252, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026012254878878593, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02573101595044136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02328120730817318, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022811708971858025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016705995425581932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016695784404873848, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015626713633537292, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011750303208827972, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011750303208827972, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011750303208827972, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07078325748443604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06052554026246071, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0539265051484108, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.047668106853961945, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03169965744018555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.02634410187602043, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04340728372335434, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.039135150611400604, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03349989652633667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.026626138016581535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.025717943906784058, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.022044114768505096, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.018705077469348907, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.015387521125376225, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.014509898610413074, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011056027375161648, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.008202355355024338, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.007773728109896183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007229854818433523, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0066633811220526695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005836642347276211, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005822157487273216, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004555551800876856, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004006198141723871, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011056027375161648, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004555551800876856, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06671015173196793, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05589145049452782, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.04873385652899742, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.04303703457117081, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.029392462223768234, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.023607736453413963, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04142766073346138, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03734821453690529, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03139050677418709, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.024460522457957268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.023798750713467598, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020989807322621346, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01784129999577999, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.014228403568267822, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.013244968838989735, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01048140600323677, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007486917078495026, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.007004563231021166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0065175872296094894, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0058710407465696335, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005460573825985193, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00531101506203413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004070034250617027, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0034435058478266, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.014228403568267822, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00531101506203413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.19427983462810516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17113979160785675, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16157175600528717, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1421206146478653, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.08898446708917618, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.07945167273283005, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10698001086711884, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09792056679725647, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09228536486625671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0750720426440239, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07052431255578995, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05426570400595665, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04676460474729538, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.042708225548267365, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04169633984565735, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02709202654659748, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.021845150738954544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.021373972296714783, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019001362845301628, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018326254561543465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013942328281700611, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01353471539914608, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012238028459250927, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00837588869035244, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013942328281700611, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00837588869035244, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20254862308502197, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17899462580680847, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17018340528011322, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1491081416606903, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09362643957138062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08355516940355301, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11101189255714417, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10054740309715271, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09626322239637375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07833666354417801, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0730765238404274, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0571141391992569, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.048537008464336395, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.045393653213977814, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.044630806893110275, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028670914471149445, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02416827343404293, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023810837417840958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02123044803738594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020733697339892387, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01550249382853508, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016065426170825958, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014225502498447895, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011805962771177292, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014225502498447895, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011805962771177292, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18479111790657043, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17264804244041443, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16848449409008026, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15175114572048187, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08672328293323517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08215029537677765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09757345914840698, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08997303247451782, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08807037025690079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07750823348760605, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07316307723522186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.049704622477293015, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.043081462383270264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.041626542806625366, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.041281867772340775, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02487391047179699, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021523868665099144, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021366465836763382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019663529470562935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019445881247520447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013151513412594795, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013206087052822113, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012552278116345406, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00891180895268917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013151513412594795, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00891180895268917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24856357276439667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23285378515720367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22766636312007904, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2052817940711975, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11695852875709534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11107286810874939, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13076379895210266, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12070640176534653, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11861914396286011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10468284785747528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.098770871758461, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06658247858285904, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05772581323981285, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05602790042757988, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05562766641378403, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03327344357967377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02871515229344368, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028528202325105667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026204951107501984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02595360577106476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017382314428687096, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017204126343131065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016673503443598747, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011159183457493782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011159183457493782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011159183457493782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2244008183479309, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2044609785079956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1971486508846283, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17596673965454102, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10327345132827759, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0959274098277092, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12023209780454636, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10914537310600281, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10566668957471848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09034476429224014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08571993559598923, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06119997426867485, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.052312158048152924, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04967930167913437, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04904327541589737, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030707241967320442, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026115769520401955, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025870975106954575, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02361164428293705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023205654695630074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016553910449147224, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01679942011833191, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015615178272128105, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011985579505562782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011985579505562782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011985579505562782, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07576137781143188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06692712008953094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0599372573196888, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.053006235510110855, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.034438662230968475, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.02909015864133835, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04769131913781166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04282825067639351, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03588069975376129, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.029637761414051056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.028718234971165657, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02433890849351883, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.020613260567188263, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.016768721863627434, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.015734964981675148, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012214915826916695, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.009015089832246304, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.008500858210027218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.00810418650507927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.007470005191862583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006483782082796097, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006527154706418514, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.005027649458497763, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004593140911310911, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012214915826916695, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004593140911310911, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06835738569498062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05958346650004387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05194634199142456, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.045780014246702194, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.030642803758382797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.024975067004561424, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.043887343257665634, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03969786316156387, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.032106395810842514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02618534117937088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.025499455630779266, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022210996598005295, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01902848482131958, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.014849710278213024, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.013687588274478912, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.011154954321682453, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007859273813664913, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.007266296539455652, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.006999465636909008, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.006253535859286785, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0058029950596392155, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005700094625353813, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004268042277544737, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003724700538441539, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.013687588274478912, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005700094625353813, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20824971795082092, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18651430308818817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1780882179737091, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1567002534866333, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09620370715856552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08729250729084015, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11380533128976822, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10374925285577774, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0990900993347168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08204048871994019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07695772498846054, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0578162744641304, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04958279803395271, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04614802449941635, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04531310871243477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.028878051787614822, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.023598095402121544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02319580316543579, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020693378522992134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02011427842080593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01491178385913372, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014452720060944557, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013387841172516346, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009018320590257645, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013387841172516346, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009018320590257645, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2096768170595169, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18742741644382477, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1787184774875641, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15443669259548187, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0970696434378624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.088035449385643, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11630701273679733, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1043296754360199, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09959102421998978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08148594945669174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07569915801286697, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.059892017394304276, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.050668276846408844, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.047423046082258224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04662054032087326, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03022904321551323, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025751210749149323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02538483776152134, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022824015468358994, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022325197234749794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01684741862118244, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017690658569335938, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01564769446849823, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013619785197079182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013619785197079182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013619785197079182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.18330630660057068, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17161913216114044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16775882244110107, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15144982933998108, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08628100156784058, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08191301673650742, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09666949510574341, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0890713706612587, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08752629905939102, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07725824415683746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07301074266433716, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04937111213803291, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04275411739945412, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.041478533297777176, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04118655249476433, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02472095564007759, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021591125056147575, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02145962417125702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019790129736065865, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.0196056105196476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013187441974878311, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01342764776200056, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01267954520881176, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009339623153209686, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013187441974878311, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009339623153209686, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25270769000053406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23718306422233582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23213127255439758, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20962849259376526, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11920521408319473, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11346441507339478, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1332421600818634, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12264513969421387, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12088346481323242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10684843361377716, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10115757584571838, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06809327006340027, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05881370231509209, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05726276710629463, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.056905727833509445, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03408912569284439, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029642578214406967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029475010931491852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027160124853253365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026922546327114105, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018155312165617943, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018154270946979523, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017515312880277634, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012356809340417385, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012356809340417385, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012356809340417385, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22281745076179504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20260746777057648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.19517375528812408, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1742454171180725, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10245916992425919, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09491972625255585, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11921212822198868, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1085653305053711, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10486335307359695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08940982818603516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08482707291841507, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06074293330311775, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05198337882757187, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04923829436302185, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.048579439520835876, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030454928055405617, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025791853666305542, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02554280124604702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02324376069009304, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022828159853816032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01637275703251362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01648351177573204, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015416912734508514, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011616602540016174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011616602540016174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011616602540016174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.07663390785455704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.06729678064584732, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.061969026923179626, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0542447529733181, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03484340012073517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.030281931161880493, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04444344341754913, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04067894071340561, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.03626124933362007, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.029429277405142784, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.02789573185145855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02257918193936348, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.019437991082668304, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.016820549964904785, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.016147315502166748, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011301824823021889, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.008824445307254791, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.008496848866343498, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.007757207844406366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.007326912600547075, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.005935332737863064, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.005939336493611336, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004950241651386023, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004006674978882074, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.011301824823021889, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.004950241651386023, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.06967383623123169, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.05981316417455673, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.05416067689657211, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.04713644087314606, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03110651485621929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.026293115690350533, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04081807658076286, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.03719741478562355, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.032671425491571426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.02589137852191925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02467290125787258, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.020711800083518028, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.017798563465476036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.01500774547457695, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.014288828708231449, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010370035655796528, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.007847153581678867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.007488916628062725, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00682095717638731, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0063340929336845875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005394800566136837, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005333647597581148, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004336868412792683, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0034846318885684013, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.010370035655796528, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005333647597581148, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.19718629121780396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17441478371620178, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1658068597316742, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14481250941753387, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09043225646018982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08143828809261322, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10772613435983658, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09784074872732162, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09359274059534073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07610971480607986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07104485481977463, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05482972413301468, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04681849479675293, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04341090843081474, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.042570438235998154, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027386188507080078, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022337935864925385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.021962158381938934, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01943698711693287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.01887602172791958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014165281318128109, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013908076100051403, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012613567523658276, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00898503977805376, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014165281318128109, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00898503977805376, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20415081083774567, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1853303164243698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1781286597251892, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15456734597682953, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09466741979122162, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08725745230913162, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11102845519781113, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10091730952262878, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09694501757621765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08092810958623886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07501451671123505, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05701332166790962, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.048560477793216705, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04574577510356903, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0450432226061821, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028552640229463577, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.024094756692647934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023773303255438805, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021394744515419006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02096633054316044, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01526566967368126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01563102751970291, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014194474555552006, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011183608323335648, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014194474555552006, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011183608323335648, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1719595193862915, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16104641556739807, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15738807618618011, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1418876051902771, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08101459592580795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07687937468290329, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09095340967178345, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08375483006238937, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08217555284500122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0724697932600975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06852735579013824, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04651487618684769, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04016679897904396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.038933370262384415, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.038639456033706665, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023277997970581055, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020236598327755928, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020103368908166885, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018533067777752876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018352387472987175, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012400604784488678, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012555493041872978, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01190329808741808, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008691573515534401, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012400604784488678, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008691573515534401, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24580439925193787, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2304660975933075, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22546370327472687, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20338384807109833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11596501618623734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11021559685468674, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12979088723659515, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1194620132446289, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11760922521352768, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10373742878437042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0980876013636589, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06632795929908752, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057210639119148254, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05564003065228462, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05526890233159065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033158909529447556, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028664520010352135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02849491685628891, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026185013353824615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025949722155928612, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017488516867160797, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017377348616719246, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.0168243870139122, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011576886288821697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011576886288821697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011576886288821697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21731387078762054, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19852042198181152, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1916348785161972, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1713932305574417, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10003078728914261, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09298375248908997, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11606831103563309, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10573800653219223, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10220850259065628, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08776234835386276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08316409587860107, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05913809314370155, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.050687871873378754, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.048108287155628204, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.047495871782302856, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.029762186110019684, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02528253197669983, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025043467059731483, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022893624380230904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022507496178150177, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016214361414313316, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01624574325978756, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015328342095017433, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011582520790398121, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011582520790398121, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011582520790398121, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09278218448162079, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08180535584688187, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07559406012296677, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06630025804042816, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04226505011320114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03693641349673271, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05400507524609566, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04905536770820618, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04391241818666458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03583808243274689, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03403833881020546, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.027491126209497452, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02351582981646061, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.020470794290304184, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.019692042842507362, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013790715485811234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01088782399892807, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010508514009416103, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.00965287908911705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.00916210561990738, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0073632444255054, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007478870451450348, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.00621062982827425, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005285242572426796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013790715485811234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005285242572426796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08285263180732727, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07214684039354324, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06585507094860077, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05751814693212509, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03734969347715378, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.031993281096220016, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04880589619278908, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04422490671277046, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03904319182038307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03139251470565796, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.029963551089167595, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02479688636958599, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021182095631957054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.018009115010499954, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.017193498089909554, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012416135519742966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009417067281901836, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009017691016197205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00824374333024025, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.0077127208933234215, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006478087045252323, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006368496920913458, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005257787182927132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004196609370410442, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012416135519742966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005257787182927132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20436225831508636, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18395502865314484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1766839623451233, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1544664204120636, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09460783004760742, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0866323709487915, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1096624881029129, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10048218071460724, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0970790758728981, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08039311319589615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07487999647855759, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.055739715695381165, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.047975510358810425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04531402513384819, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.044668566435575485, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.027802877128124237, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.023024359717965126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022717079147696495, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020078642293810844, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.01964428648352623, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014249822124838829, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013759315945208073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013082075864076614, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00838172435760498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013759315945208073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00838172435760498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2117622345685959, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19054892659187317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18256862461566925, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15702620148658752, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.098351389169693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0898289903998375, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11482182145118713, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10463659465312958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10072207450866699, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08208152651786804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07673614472150803, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05892273783683777, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05027461796998978, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04737437143921852, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04668872430920601, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.029466411098837852, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.024718349799513817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02439885586500168, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02147454023361206, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021017245948314667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015594260767102242, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015709377825260162, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014468986541032791, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010879117995500565, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010879117995500565, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010879117995500565, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17775441706180573, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1659349650144577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16192112863063812, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.14585724472999573, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08368734270334244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07917136698961258, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09426146745681763, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08667157590389252, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08497942239046097, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07461642473936081, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07061819732189178, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.048263490200042725, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.041665542870759964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.040307484567165375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03999324515461922, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024198632687330246, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021116716787219048, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02097570337355137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019326018169522285, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01913241483271122, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013022852130234241, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013345146551728249, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012483811005949974, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009499437175691128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013022852130234241, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009499437175691128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24171990156173706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2258741557598114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2206742763519287, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19871562719345093, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11378085613250732, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10785142332315445, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1276850402355194, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11746238172054291, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11556465178728104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10145917534828186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09587214142084122, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06526020914316177, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05630524456501007, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05465017631649971, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.054265573620796204, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0326545424759388, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028277749195694923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028097543865442276, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02578805759549141, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02553877979516983, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01733352057635784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01733822375535965, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016641395166516304, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011768992058932781, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011768992058932781, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011768992058932781, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22854851186275482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20860543847084045, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20105808973312378, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17928415536880493, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10533776134252548, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09784410893917084, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12270398437976837, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1117323637008667, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10776231437921524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09220390021800995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08705121278762817, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06230539456009865, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.053468599915504456, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05060698464512825, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04992946982383728, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0312519334256649, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026454122737050056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02618780918419361, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02387845888733864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023439720273017883, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01677832193672657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016834521666169167, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01578553393483162, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011739114299416542, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011739114299416542, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011739114299416542, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.08467835187911987, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.07512733340263367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.06996957212686539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06083293259143829, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.03878594934940338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.034237008541822433, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.04845080524682999, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04424821585416794, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0400330126285553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.032716039568185806, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.030776286497712135, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0246156957000494, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.021133609116077423, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.01870180107653141, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.018086768686771393, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012315981090068817, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.009778768755495548, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.009462740272283554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.008575868792831898, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.008169637061655521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.006471308413892984, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.006460892967879772, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.005530382040888071, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.004339311737567186, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.012315981090068817, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.005530382040888071, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.07867087423801422, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.06902068108320236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06392686814069748, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.055248986929655075, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.035655051469802856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.031109338626265526, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.045052725821733475, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.040937524288892746, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03694793954491615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.029787931591272354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02806447073817253, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.022847980260849, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.01952511817216873, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.017132362350821495, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.016514526680111885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01141403429210186, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.008872538805007935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.008558705449104309, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.007701626978814602, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007291548419743776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.005926358979195356, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.005795689765363932, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004967074375599623, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.003727216739207506, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01141403429210186, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.004967074375599623, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.1926901638507843, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.17211154103279114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.16458584368228912, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1426766812801361, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0891997218132019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08098369091749191, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.10468067973852158, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09503647685050964, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09156548976898193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07495515793561935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.06949084252119064, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05326779559254646, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04542279243469238, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0427224226295948, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04208236560225487, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.026573440060019493, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.021855995059013367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.021542847156524658, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.018953094258904457, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.018513638526201248, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013708220794796944, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013316018506884575, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012440738268196583, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008408550173044205, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.013708220794796944, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008408550173044205, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20455799996852875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18450284004211426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1768423467874527, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15457850694656372, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09438113868236542, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08644340932369232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1114288941025734, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10107547789812088, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09683844447135925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08032740652561188, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07590904086828232, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05730337277054787, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.048688311129808426, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04566333070397377, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.044942501932382584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.028780126944184303, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02420366182923317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023870259523391724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021476641297340393, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02100600115954876, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01561242900788784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01593616232275963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014476628042757511, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587193235754967, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587193235754967, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587193235754967, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17362293601036072, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16114428639411926, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1568058878183365, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1407066285610199, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08147209137678146, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07675617933273315, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09240593016147614, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08485118299722672, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08294297009706497, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07224799692630768, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06826575845479965, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04731670022010803, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04077637940645218, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03925776109099388, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.038895174860954285, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023724842816591263, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020593667402863503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020432723686099052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018772320821881294, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01855411008000374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01277695968747139, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013085003942251205, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01216914039105177, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0093267522752285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01277695968747139, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0093267522752285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2315446138381958, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21526162326335907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20973023772239685, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1881679743528366, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10875865072011948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1025737076997757, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12270377576351166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11278586089611053, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11064136028289795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09637262672185898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09090077877044678, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06266751140356064, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05404166877269745, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0522131621837616, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05177979916334152, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031347353011369705, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027006225660443306, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.026804497465491295, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024511262774467468, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.0242290236055851, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016566317528486252, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016585595905780792, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01580272614955902, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011209342628717422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011209342628717422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011209342628717422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23438380658626556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21428072452545166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20696432888507843, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18450574576854706, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10849549621343613, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10091729462146759, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.126156285405159, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11444183439016342, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.11084960401058197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09503480792045593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08981114625930786, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06438127160072327, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.054938334971666336, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05222591012716293, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05157335475087166, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03233987092971802, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027427997440099716, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02718106284737587, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024804847314953804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024397611618041992, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017465462908148766, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01759452000260353, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0165035892277956, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012518996372818947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012518996372818947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012518996372818947, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09552817046642303, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08451572060585022, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.07874596863985062, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.06835180521011353, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.043915390968322754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03871854394674301, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05457484722137451, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.049765583127737045, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04534374177455902, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.036852553486824036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.034612420946359634, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02774573490023613, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.023867551237344742, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.021242616698145866, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.020579036325216293, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013914024457335472, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011246126145124435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010919025167822838, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.009874521754682064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009455539286136627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.0073770796880126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00757217314094305, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006374829448759556, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005339829251170158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.013914024457335472, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005339829251170158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08294740319252014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07298924773931503, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06699774414300919, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.05785764381289482, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03764741122722626, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03264612331986427, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.04845034331083298, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04399685189127922, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.03894568234682083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03151831775903702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.02976413071155548, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.024559631943702698, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.021011587232351303, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.018164604902267456, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.017440970987081528, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012288815341889858, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.00952378660440445, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009148157201707363, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008316783234477043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.007838134653866291, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006457186304032803, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0064068445935845375, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005393500439822674, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004310837481170893, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012288815341889858, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005393500439822674, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20183075964450836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.1781705617904663, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1698230504989624, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1462443470954895, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09278282523155212, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08345423638820648, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1103423535823822, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.09953287988901138, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09567442536354065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07709159702062607, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07134682685136795, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05629926174879074, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04757659137248993, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04447241127490997, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04372371733188629, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02812669798731804, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.022781478241086006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02244047075510025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019600452855229378, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.01907949335873127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0145402317866683, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013980252668261528, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.012976938858628273, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0088723786175251, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.013980252668261528, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0088723786175251, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21108229458332062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19005876779556274, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.17681114375591278, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1541566550731659, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.097969189286232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08680369704961777, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12670205533504486, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11366315931081772, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10130452364683151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08418203145265579, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07957428693771362, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0656885877251625, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05504106357693672, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04787912219762802, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04605815187096596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032980017364025116, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025893818587064743, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02503630705177784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023281648755073547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.0221713799983263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01800036057829857, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018288230523467064, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015471134334802628, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013530955649912357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013530955649912357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013530955649912357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16969425976276398, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1570584625005722, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15258361399173737, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13691453635692596, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07970043271780014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07481127232313156, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09092139452695847, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08328632265329361, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0811825543642044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07043885439634323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06665033102035522, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04662138968706131, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04013776779174805, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.038510870188474655, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0381336510181427, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.023423122242093086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020406872034072876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.020236462354660034, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0186056736856699, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01837555691599846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01273500919342041, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013242269866168499, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012089944444596767, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009727280586957932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01273500919342041, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009727280586957932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.22944000363349915, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21257364749908447, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20689357817173004, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.18562829494476318, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10755333304405212, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10120072215795517, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12189634889364243, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.111857108771801, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1095152199268341, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09506092965602875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08966603875160217, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.062037356197834015, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.053576674312353134, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.051627907902002335, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05116662010550499, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031054427847266197, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.026668108999729156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.026456033810973167, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02414744719862938, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023850087076425552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01629660464823246, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016346294432878494, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015462971292436123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010955043137073517, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010955043137073517, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010955043137073517, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2334098368883133, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21363238990306854, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20634479820728302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18372292816638947, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10825887322425842, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10084527730941772, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12583278119564056, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11422459781169891, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1106802448630333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09479182213544846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08958345651626587, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06434378772974014, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05489048361778259, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05218210443854332, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05153077468276024, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03235308825969696, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027470523491501808, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027225900441408157, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024854468181729317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024447521194815636, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017460571601986885, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01770017482340336, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016475627198815346, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012692144140601158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012692144140601158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012692144140601158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10160299390554428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08942127972841263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08238961547613144, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07157237082719803, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04652811586856842, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04049486666917801, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.060462962836027145, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.054045844823122025, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04825122654438019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.039064399898052216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03717150166630745, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030894719064235687, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.025984108448028564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.022589603438973427, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.021732337772846222, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015584755688905716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012100047431886196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011669524945318699, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.0106781255453825, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010121315717697144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008370728231966496, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008401090279221535, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0069987038150429726, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006029206328094006, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012100047431886196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006029206328094006, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08996304869651794, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07856966555118561, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07197257876396179, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.062232855707407, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04079296067357063, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.035189855843782425, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.053012292832136154, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04779043048620224, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04238472133874893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03403954952955246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03224969282746315, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.02700509876012802, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.022907262668013573, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.01973978616297245, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018928760662674904, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013564810156822205, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010430746711790562, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010021647438406944, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009123587049543858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008600402623414993, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007151286583393812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0071337418630719185, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005913819186389446, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004911921452730894, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013564810156822205, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004911921452730894, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20265789330005646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18095169961452484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17256693542003632, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.14989110827445984, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09399119764566422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08502562344074249, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11173354834318161, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.101039819419384, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09671266376972198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.07901827991008759, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0736115351319313, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05698500573635101, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.04833054170012474, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04507095739245415, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04429042339324951, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.02847878821194172, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02304607257246971, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02266652137041092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.019970860332250595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.01943327486515045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.014701801352202892, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014078161679208279, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013205565512180328, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008812300860881805, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014078161679208279, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.008812300860881805, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2246474325656891, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20071209967136383, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19192662835121155, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1678597629070282, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10409149527549744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09456215053796768, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12315598875284195, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11127868294715881, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10710018128156662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0884542167186737, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08227900415658951, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06351328641176224, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.053784582763910294, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05052255094051361, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.049738623201847076, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03195398673415184, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026866720989346504, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02651912346482277, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023831382393836975, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02332327887415886, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017430230975151062, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017760755494236946, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016181495040655136, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013035259209573269, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013035259209573269, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013035259209573269, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.17523561418056488, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16182415187358856, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15721750259399414, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1409795880317688, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08237217366695404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07720571011304855, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09408127516508102, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08590130507946014, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08392319828271866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07258748263120651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06882674992084503, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04826272651553154, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04151781275868416, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.039932139217853546, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0395621582865715, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02432212047278881, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021356096491217613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021193131804466248, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019479840993881226, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.019255176186561584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013465715572237968, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01410670392215252, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01284234318882227, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010634157806634903, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013465715572237968, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010634157806634903, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23761877417564392, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21989469230175018, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21408575773239136, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19175559282302856, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.1116868108510971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10498233139514923, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.126115620136261, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11585769802331924, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11372964084148407, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09841864556074142, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09272666275501251, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06453534960746765, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.055576980113983154, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05369340255856514, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05325556546449661, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03226939216256142, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02789316326379776, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027690036222338676, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025236323475837708, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024947062134742737, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017121220007538795, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01731475628912449, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01633518747985363, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011919337324798107, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011919337324798107, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011919337324798107, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23736630380153656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21591782569885254, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.2071407437324524, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18443697690963745, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10975740104913712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10121563822031021, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12957462668418884, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11759329587221146, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1123829185962677, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09561984986066818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.09053365141153336, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06604725867509842, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0564567893743515, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05297122523188591, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05211979150772095, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03325427696108818, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02801697328686714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027658071368932724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025285370647907257, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024740900844335556, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018054643645882607, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01833474263548851, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01681228168308735, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0132166538387537, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0132166538387537, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0132166538387537, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10201451182365417, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09121198952198029, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08601713180541992, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07485536485910416, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04712952300906181, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04224482923746109, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05734407529234886, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05221952125430107, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0484195202589035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.03977479040622711, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03733285143971443, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.02913772128522396, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.024956658482551575, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02266852557659149, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.022098030894994736, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014577312394976616, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011788740754127502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011505691334605217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01033010333776474, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009955366142094135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007633948232978582, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007595743052661419, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006705347914248705, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0050819870084524155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011788740754127502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0050819870084524155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08694787323474884, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07762018591165543, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07244136929512024, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06306643784046173, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03991314396262169, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03536822274327278, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.049778882414102554, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04528821259737015, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04104830324649811, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.0337459035217762, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03183447942137718, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.025262106209993362, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02166152186691761, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.019217707216739655, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018599847331643105, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012630021199584007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009983046911656857, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009672245942056179, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.008764388039708138, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008361616171896458, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006607601419091225, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0065216501243412495, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005669732578098774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004304510075598955, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012630021199584007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005669732578098774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.21451833844184875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19177280366420746, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1837092936038971, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1591620147228241, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.09920569509267807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09026478976011276, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11649356037378311, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10531864315271378, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10189049690961838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08334337919950485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07737939059734344, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.05930966138839722, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05028798058629036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04749499633908272, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.046811360865831375, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.029648728668689728, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02419649437069893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.023884812369942665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020908160135149956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020463338121771812, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015194211155176163, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014524299651384354, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013733341358602047, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00895327515900135, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013733341358602047, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00895327515900135, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2312372326850891, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21134108304977417, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20373201370239258, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.17998167872428894, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10751422494649887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09947749972343445, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12543541193008423, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11420043557882309, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10993025451898575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09308257699012756, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08802124857902527, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06445109844207764, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055060215294361115, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05200360342860222, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05126874893903732, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03230832889676094, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027509652078151703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027188880369067192, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024701884016394615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024250319227576256, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017390944063663483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0179852694272995, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016249453648924828, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013075686059892178, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013075686059892178, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013075686059892178, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1799270510673523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.16624809801578522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.16144411265850067, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.144907608628273, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.08467359840869904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07937128841876984, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.096576027572155, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08853884786367416, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08629666268825531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.07472794502973557, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07066838443279266, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04956955835223198, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04270915314555168, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0409659706056118, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04054870828986168, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.024884531274437904, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.021758511662483215, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.021579626947641373, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.019834011793136597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01957843266427517, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013495296239852905, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.014204981736838818, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012804034166038036, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010494528338313103, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.013495296239852905, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010494528338313103, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24207760393619537, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22419093549251556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21820774674415588, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19561775028705597, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11381657421588898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10706418752670288, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12882480025291443, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11820831894874573, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11590762436389923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10040050745010376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09468857198953629, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06590419262647629, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05667748674750328, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.054698579013347626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05422961711883545, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032984644174575806, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028327368199825287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02812184952199459, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02562839537858963, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025326719507575035, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017532911151647568, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017464131116867065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016700197011232376, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0118612851947546, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0118612851947546, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0118612851947546, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.24462655186653137, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.22143182158470154, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.21281103789806366, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1889033317565918, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.11307372152805328, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.10427138209342957, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1324816793203354, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.1201620027422905, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1159958690404892, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0978684276342392, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.09259454905986786, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06777219474315643, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05766821280121803, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05445019155740738, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0536750964820385, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.034089453518390656, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02854216657578945, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.028256546705961227, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025566812604665756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.025073610246181488, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01840350590646267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01829838566482067, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.017268097028136253, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012889592908322811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012889592908322811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012889592908322811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10545190423727036, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.0944736897945404, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08837644010782242, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07738256454467773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04867541044950485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04335174709558487, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06086605787277222, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05521169304847717, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05010782927274704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04148354381322861, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03924364224076271, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.030961310490965843, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.026455137878656387, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0235123448073864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.022779466584324837, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.015507911331951618, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012367481365799904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011997004970908165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01095329225063324, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01048087514936924, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008179607801139355, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008238633163273335, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007022422272711992, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005673644598573446, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012367481365799904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005673644598573446, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08741115033626556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07855049520730972, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07186757028102875, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06301822513341904, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04001106321811676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03487512469291687, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.052526671439409256, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.047847095876932144, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.041218508034944534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.034389350563287735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03272107243537903, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.026577400043606758, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02281804382801056, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.019322549924254417, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01843770034611225, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013300122693181038, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010144553147256374, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009661532007157803, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009041165933012962, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008448335342109203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006984504871070385, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006917519494891167, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005735894199460745, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004591071978211403, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.013300122693181038, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004591071978211403, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.226776584982872, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20377780497074127, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1949375420808792, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16983133554458618, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10527817904949188, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09594414383172989, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12544552981853485, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11256343126296997, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10811278223991394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08907955139875412, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08308477699756622, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06415718793869019, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05382635071873665, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05052652582526207, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04973183944821358, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.032066330313682556, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02595764584839344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025562509894371033, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022665634751319885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022118965163826942, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.016587968915700912, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015968678519129753, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014858261682093143, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01027701236307621, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01027701236307621, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01027701236307621, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.24386192858219147, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21738164126873016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20703807473182678, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.17710383236408234, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11284739524126053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10249453783035278, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.13519684970378876, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.12237226217985153, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11670109629631042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.0938415676355362, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08817973732948303, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06957819312810898, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05903454124927521, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05468617379665375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05361516401171684, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03486480563879013, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02898448333144188, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.028531836345791817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02530667930841446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024623075500130653, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018717925995588303, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.0191771499812603, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.017135512083768845, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013872281648218632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013872281648218632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013872281648218632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16893413662910461, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15653665363788605, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15173952281475067, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13635052740573883, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07959200441837311, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07462862133979797, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.09153494238853455, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08379717171192169, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08114884048700333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0705593004822731, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06683995574712753, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.047005545347929, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.040490180253982544, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03859318047761917, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03814495727419853, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.02360178902745247, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020609350875020027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02040712721645832, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018870236352086067, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018594317138195038, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012848596088588238, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013631335459649563, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.012102652341127396, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010190424509346485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012848596088588238, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010190424509346485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23679527640342712, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22003580629825592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21412770450115204, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19245997071266174, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11154691874980927, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10503026098012924, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1266404390335083, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11613332480192184, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11350645869970322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09888852387666702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09346240758895874, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0648273378610611, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05574098229408264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05365276336669922, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.053167007863521576, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.032457683235406876, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027909882366657257, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027687063440680504, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0253804512321949, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025063510984182358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017316613346338272, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017388740554451942, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01645028218626976, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011991339735686779, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011991339735686779, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011991339735686779, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2338503748178482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21134254336357117, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20265282690525055, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18008162081241608, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10805360227823257, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09926483780145645, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1279645562171936, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11544274538755417, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1108260229229927, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09358091652393341, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08869179338216782, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06552539765834808, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05573694035410881, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.052337851375341415, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05146665498614311, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.033342085778713226, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027947459369897842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02759978361427784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.025198737159371376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.024654008448123932, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018250199034810066, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01864461787045002, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016973363235592842, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01380125805735588, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01380125805735588, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01380125805735588, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11688606441020966, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10553498566150665, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09923914074897766, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0874347984790802, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05412634089589119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04860686883330345, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.0674842894077301, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06093614175915718, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05558513477444649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.046603359282016754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04419395700097084, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.034390997141599655, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.029271699488162994, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.026205824688076973, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.025442568585276604, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01734098233282566, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013897228054702282, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013504835776984692, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012438254430890083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011948015540838242, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009245228953659534, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00935184583067894, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.008003207854926586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0066218203864991665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013897228054702282, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0066218203864991665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09173539280891418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08267379552125931, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07552454620599747, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06665422767400742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04196898266673088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03645262494683266, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05612366646528244, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05057084187865257, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.043199945241212845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.036441948264837265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.035088349133729935, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.028632795438170433, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02424382045865059, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020273571833968163, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019228389486670494, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014328372664749622, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010656996630132198, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010110946372151375, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0095681706443429, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00890381820499897, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00748942606151104, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007341508287936449, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006007435265928507, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0048790075816214085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010656996630132198, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.0048790075816214085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2424328774213791, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22162458300590515, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21356059610843658, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.18990857899188995, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11365607380867004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10516779124736786, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13321706652641296, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1207471489906311, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11620692163705826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09886599332094193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09307149797677994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06802301108837128, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05775906890630722, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05453973263502121, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05375576391816139, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0339755155146122, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.027846598997712135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027467504143714905, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02480456791818142, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02429143898189068, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017540989443659782, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016851413995027542, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.0160464309155941, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010577802546322346, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010577802546322346, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010577802546322346, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.22569037973880768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2030051052570343, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19500301778316498, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16749416291713715, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10440356284379959, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09553860872983932, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12173882126808167, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11053888499736786, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10682257264852524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08808603882789612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07997816056013107, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06247024983167648, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.053058650344610214, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.050251737236976624, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04956723004579544, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0312737300992012, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026216743513941765, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025915948674082756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022972701117396355, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022533146664500237, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016653209924697876, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016619719564914703, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01556583121418953, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011539077386260033, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011539077386260033, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011539077386260033, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16473104059696198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15320749580860138, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14880681037902832, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13397230207920074, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07759351283311844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07306106388568878, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08880379796028137, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08141081780195236, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0790238305926323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06916041672229767, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06557134538888931, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04558207094669342, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03937593847513199, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03765855357050896, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03725454583764076, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022958295419812202, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.020160414278507233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.019982358440756798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018544964492321014, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.018299978226423264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012610459700226784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.013384747318923473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011961212381720543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010087709873914719, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012610459700226784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010087709873914719, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24036554992198944, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2244625985622406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21897098422050476, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19716694951057434, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11320172995328903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10705605149269104, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12777724862098694, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11745376884937286, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1150236502289772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10090634971857071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09544084966182709, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06518907845020294, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05624385178089142, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05429859831929207, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05385135859251022, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03258369117975235, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02794141322374344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02772883139550686, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025440366938710213, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025142628699541092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017126135528087616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016938824206590652, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01632820814847946, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011167516000568867, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011167516000568867, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011167516000568867, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2340066283941269, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21152937412261963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20281291007995605, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.18043361604213715, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10769287496805191, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09891083836555481, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12671686708927155, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11509693413972855, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1104835793375969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09325385838747025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08821097016334534, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06452448666095734, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05519980192184448, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.051847707480192184, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.05103496462106705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03245129808783531, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027231669053435326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.026907766237854958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024411186575889587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023880956694483757, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017581865191459656, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017568662762641907, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016416065394878387, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012408615089952946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012408615089952946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012408615089952946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11463482677936554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10440149903297424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09902822226285934, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08757127076387405, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0531439408659935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.048346467316150665, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06441572308540344, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.058681849390268326, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05439700186252594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04605034738779068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04344931244850159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03274355083703995, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028073536232113838, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02557670697569847, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.024966685101389885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016405168920755386, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013314337469637394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012995347380638123, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011894594877958298, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011495672166347504, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00861064437776804, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008575492538511753, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007606297731399536, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005756329279392958, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013314337469637394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005756329279392958, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09402911365032196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08566885441541672, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07923796027898788, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07020159065723419, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.043108098208904266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.038287725299596786, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.055655818432569504, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.050862446427345276, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.044256288558244705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03773054853081703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03597257286310196, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.028087805956602097, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.024178586900234222, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020751211792230606, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019869130104780197, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01404395792633295, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010777785442769527, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010295743122696877, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009705021977424622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009125151671469212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007320396602153778, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007161798421293497, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006069356109946966, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004588579759001732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01404395792633295, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004588579759001732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24734602868556976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22801688313484192, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.22071489691734314, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19710087776184082, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11601123958826065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10828613489866257, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13498912751674652, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12199156731367111, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11836312711238861, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10182113200426102, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09625229984521866, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06897945702075958, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.058318011462688446, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05558404698967934, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.054934874176979065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03443969041109085, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028257781639695168, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02795557677745819, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025304339826107025, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02487931028008461, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017692020162940025, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01679464615881443, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.016289081424474716, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010291830636560917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010291830636560917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010291830636560917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2362719625234604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21637459099292755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.20957089960575104, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.18598143756389618, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10967164486646652, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10182181000709534, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12599512934684753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11495630443096161, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11186471581459045, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09534391760826111, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0897727981209755, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06487563252449036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05554110184311867, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05310669541358948, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.052515577524900436, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03283482417464256, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02826356701552868, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02800736576318741, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.025477340444922447, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.025129344314336777, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.018127568066120148, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01854592189192772, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01722830720245838, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013757303357124329, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013757303357124329, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013757303357124329, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.15397922694683075, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14336107671260834, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13908806443214417, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1254250556230545, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07245668768882751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06819674372673035, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0832977443933487, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07633811980485916, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07377873361110687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06469403952360153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.061433810740709305, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04268750548362732, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03684983029961586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.035114072263240814, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03469250723719597, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021459214389324188, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018704280257225037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.018516071140766144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017210448160767555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016961291432380676, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011683649383485317, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012325908988714218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011024274863302708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009170662611722946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011683649383485317, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009170662611722946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23408550024032593, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21903027594089508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21362467110157013, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19268856942653656, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11018119752407074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10435480624437332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1245575025677681, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11450068652629852, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11189127713441849, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09846831858158112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09319698065519333, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06358326226472855, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.054824765771627426, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05286986008286476, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05240683630108833, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031788576394319534, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027227722108364105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027012750506401062, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02485351450741291, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024561483412981033, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01675461232662201, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01656581088900566, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015957867726683617, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010978441685438156, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010978441685438156, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010978441685438156, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2246115505695343, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.20198360085487366, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1928187608718872, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17143511772155762, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10301101207733154, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09395983815193176, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12205356359481812, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11113325506448746, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10595108568668365, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08882532268762589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08420245349407196, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.062314536422491074, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05335965007543564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0496121384203434, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04870111122727394, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03142236918210983, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026110967621207237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02573835849761963, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.023346122354269028, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022755328565835953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017043720930814743, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01700069010257721, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015731148421764374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0120078818872571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0120078818872571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.0120078818872571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.09605204313993454, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.08735382556915283, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08201857656240463, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0731050968170166, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04432244598865509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.039898600429296494, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05558447912335396, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.050550658255815506, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04557773470878601, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.038805171847343445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.03707513213157654, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.028269091621041298, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.024208681657910347, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02138338051736355, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.020680496469140053, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014153892174363136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.011232043616473675, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.010868494398891926, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010148350149393082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.009691538289189339, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.007427220232784748, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.007480556145310402, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006347471848130226, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005097568035125732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.014153892174363136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005097568035125732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.08360017836093903, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.07553426921367645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.06921795755624771, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06183791160583496, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.03804653137922287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03338804468512535, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0504537969827652, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.04581371322274208, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.039337195456027985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03339540958404541, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.032124295830726624, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.025441940873861313, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02178470976650715, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.018357815220952034, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.017460286617279053, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012714909389615059, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.009594359435141087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009128185920417309, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.00868113711476326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00810183398425579, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.006642709020525217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.006534204818308353, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005368586629629135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004271005280315876, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.012714909389615059, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005368586629629135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.20766949653625488, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.18635757267475128, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.17605747282505035, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1573227345943451, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0950762927532196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0856858566403389, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.11827825009822845, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.10607893764972687, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.09860661625862122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08279478549957275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.07881895452737808, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06023261696100235, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05066055431962013, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04562662914395332, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04436449334025383, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.030069276690483093, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02340790629386902, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.022811653092503548, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.020902954041957855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020082812756299973, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.015457645989954472, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.014757920987904072, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01317527424544096, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009255088865756989, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01317527424544096, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009255088865756989, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.22783780097961426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20111052691936493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.191355362534523, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.16512538492679596, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10504302382469177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.0942046195268631, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12471640855073929, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11336072534322739, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10848962515592575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08779667317867279, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0809260681271553, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06422828882932663, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05476383864879608, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.051024556159973145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05010735243558884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032245442271232605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02722526527941227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026812007650732994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023895056918263435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02331256866455078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01746794581413269, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018175655975937843, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.01600753143429756, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013401245698332787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013401245698332787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013401245698332787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16110093891620636, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1497545838356018, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14528821408748627, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1313333511352539, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07568539679050446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07118357717990875, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0869111642241478, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07976394146680832, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07712706923484802, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06768428534269333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06422267109155655, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04451076313853264, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03844643756747246, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.036611158400774, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03617004305124283, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022376492619514465, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01943499967455864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01923494040966034, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017884965986013412, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.017616135999560356, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012179500423371792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012717284262180328, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011487387120723724, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009347822517156601, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.012179500423371792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.009347822517156601, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2408895641565323, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2253989577293396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21983346343040466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19860084354877472, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11342930793762207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10750017315149307, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12836211919784546, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11789726465940475, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11525744199752808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10162972658872604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09645818173885345, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06559581309556961, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05657735466957092, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05456996709108353, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05409880355000496, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03298165276646614, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028392519801855087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028166089206933975, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02602149359881878, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.0257275253534317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01770433783531189, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017667783424258232, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01691303588449955, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01221267320215702, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01221267320215702, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01221267320215702, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.23682673275470734, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.21104155480861664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.20075932145118713, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.17780379951000214, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10856086015701294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09829577058553696, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12960998713970184, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11745453625917435, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.1119653508067131, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.09273608773946762, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08768583834171295, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06612841039896011, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05641684681177139, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.052370332181453705, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.051383644342422485, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03335169330239296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.027683507651090622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.027305886149406433, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.024621130898594856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.023987140506505966, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.018176734447479248, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.018196791410446167, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.0167493037879467, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013046099804341793, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013046099804341793, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013046099804341793, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1112285852432251, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1017991155385971, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09595004469156265, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.0855679139494896, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05162515118718147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.046793390065431595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06455763429403305, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.058436788618564606, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05289940908551216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04539192467927933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04321635887026787, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03294454142451286, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0280893724411726, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.025025831535458565, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.024246688932180405, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016536682844161987, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013299633748829365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012908986769616604, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01209031417965889, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01161467656493187, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008815903216600418, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00901190284639597, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007626282051205635, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006434975657612085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013299633748829365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006434975657612085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09442112594842911, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08491719514131546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0768284872174263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06881102174520493, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042829323559999466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03704513609409332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05770675465464592, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.052851662039756775, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04440266266465187, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03765380010008812, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03638403117656708, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.029265977442264557, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0251235943287611, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020687729120254517, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019513551145792007, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014651418663561344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010855183005332947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010240798816084862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009830204769968987, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009079566225409508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007648375816643238, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007514560595154762, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006044426932930946, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004903338849544525, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010855183005332947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004903338849544525, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2267690896987915, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20399154722690582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19348397850990295, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1738225370645523, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10473071783781052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09467390179634094, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12936806678771973, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11579488962888718, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10831989347934723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09118601679801941, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0872938260436058, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06631579250097275, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05540227144956589, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05034656077623367, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.049078941345214844, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03317975997924805, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.025844477117061615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02525719627737999, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0230860635638237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022261137142777443, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017135199159383774, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016218110918998718, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01475021243095398, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01022525504231453, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01022525504231453, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01022525504231453, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20203296840190887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.17926675081253052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1716083586215973, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.14407292008399963, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.09185882657766342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08379806578159332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10770207643508911, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09749653190374374, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09402360767126083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07584596425294876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06964778155088425, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0553269162774086, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.047269582748413086, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04471810162067413, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04409587383270264, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02800065465271473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02424660138785839, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023959707468748093, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.021274935454130173, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020882654935121536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01573128066956997, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.016562210395932198, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014764491468667984, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012816662900149822, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012816662900149822, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012816662900149822, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1531803458929062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1422896832227707, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13781681656837463, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12439985573291779, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.071830153465271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06741765141487122, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08273117989301682, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07608480006456375, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0732593908905983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06416112929582596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.060873910784721375, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.042316894978284836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03660377860069275, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03469790890812874, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03424045443534851, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021220725029706955, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018304359167814255, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.0180947408080101, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016805753111839294, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01652732864022255, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01141945831477642, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011855275370180607, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01069649774581194, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008536488749086857, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01141945831477642, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008536488749086857, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.23107893764972687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.21584729850292206, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.21023976802825928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.18991471827030182, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10856407135725021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10268755257129669, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12326643615961075, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11329733580350876, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11039497703313828, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09721256792545319, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09205252677202225, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06285832822322845, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05428066477179527, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.052162185311317444, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.0516464002430439, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.031510353088378906, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.026968497782945633, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.026733675971627235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024673942476511, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02435966208577156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.016716893762350082, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016590826213359833, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015874303877353668, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011144080199301243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011144080199301243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011144080199301243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22627779841423035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.2007996290922165, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.190159872174263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16793614625930786, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10339086502790451, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09318505972623825, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12496920675039291, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11315959692001343, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10687127709388733, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08818240463733673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08344577997922897, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0638909861445427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05451713129878044, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.05000552907586098, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04889531061053276, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03234436362981796, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.026660284027457237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02621116302907467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02376939356327057, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02306521311402321, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017776861786842346, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01792214997112751, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.016192687675356865, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013091795146465302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013091795146465302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.013091795146465302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11458475887775421, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10521721094846725, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09922682493925095, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08896063268184662, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05315985530614853, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04826700687408447, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06641630083322525, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06035003066062927, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05450526252388954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04708350449800491, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04507479816675186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03390023857355118, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028912125155329704, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.025661198422312737, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.024852171540260315, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01699328050017357, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01349621545523405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013078375719487667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012299962341785431, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011800745502114296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009005766361951828, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008982855826616287, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007764372043311596, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00618333974853158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01349621545523405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00618333974853158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09367676079273224, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08524010330438614, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07689864933490753, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06907743960618973, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042659692466259, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03694386035203934, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.058064259588718414, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.053269900381565094, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04404253512620926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03787532448768616, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.036724042147397995, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.029418636113405228, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02526448853313923, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020627526566386223, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01937437430024147, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.014751100912690163, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010802061296999454, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010147864930331707, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009864102117717266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009078094735741615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.00771591579541564, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0075258975848555565, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006031692959368229, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004870130214840174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010802061296999454, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004870130214840174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24211911857128143, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22024257481098175, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21064721047878265, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.18916773796081543, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11234228312969208, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.1030142679810524, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13572625815868378, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12208415567874908, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11561204493045807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09854169934988022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09384438395500183, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06936275959014893, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05838242918252945, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05387038737535477, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05274758115410805, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03468915447592735, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02750413678586483, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.026986584067344666, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02465931512415409, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.023943543434143066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017764987424016, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01683572679758072, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015585385262966156, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010355422273278236, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010355422273278236, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010355422273278236, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.21876968443393707, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1962251514196396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18868322670459747, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1624297946691513, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10076548904180527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09205407649278641, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.11788729578256607, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1064889058470726, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10314653813838959, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08420378714799881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07874853163957596, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06063085421919823, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.051722750067710876, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.049117300659418106, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04850788414478302, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.030669236555695534, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026710186153650284, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026429718360304832, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02362251468002796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.023220065981149673, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017162472009658813, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018284011632204056, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016163747757673264, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01423129253089428, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01423129253089428, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01423129253089428, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.14981700479984283, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13934743404388428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13509230315685272, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12200312316417694, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07022643834352493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06600816547870636, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08054007589817047, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07419078797101974, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07157406955957413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0627509132027626, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05953996255993843, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.041185442358255386, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.035649485886096954, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03386881202459335, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.033445701003074646, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020656084641814232, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.017800047993659973, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.017605433240532875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01633208617568016, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01607060618698597, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011092313565313816, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011427505873143673, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010424158535897732, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008144417777657509, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011092313565313816, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008144417777657509, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.22833451628684998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2134849727153778, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.20798344910144806, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.188028946518898, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10727030783891678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10158537328243256, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12176856398582458, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11191464215517044, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1090758666396141, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09618920832872391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0911758616566658, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.062108200043439865, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05366356670856476, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.051578592509031296, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05108720064163208, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03116929903626442, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.026792477816343307, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.026569947600364685, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.024562915787100792, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.024258777499198914, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01672651804983616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016656674444675446, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.015924520790576935, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01142929494380951, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01142929494380951, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01142929494380951, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2172049731016159, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19293153285980225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18192002177238464, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16011357307434082, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09937524795532227, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08891860395669937, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12116864323616028, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10984344780445099, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10255975276231766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08448012918233871, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07978609949350357, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.0620238296687603, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05301497504115105, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04812067374587059, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.046890441328287125, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03150619566440582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02573281154036522, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02520267851650715, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02290308102965355, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022117968648672104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01745283417403698, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.017466990277171135, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01578100584447384, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012761550955474377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012761550955474377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.012761550955474377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11375096440315247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10398829728364944, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09668609499931335, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.087134949862957, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.052526574581861496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.046914417296648026, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06815823167562485, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.061620138585567474, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05403356999158859, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04664810001850128, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04501933977007866, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03476220741868019, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02953197807073593, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02543974667787552, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.0243928711861372, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017439093440771103, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013495536521077156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01296303328126669, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012355415150523186, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011708395555615425, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009267143905162811, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009280912578105927, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007701391354203224, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006470272783190012, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013495536521077156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006470272783190012, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.0935555249452591, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0841430202126503, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07416889071464539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06690432131290436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042337510734796524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.035430457442998886, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.060591962188482285, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05497867986559868, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.0439857542514801, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037493739277124405, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.036787115037441254, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030646882951259613, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026169948279857635, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020476870238780975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.01893613673746586, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015315190888941288, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010799030773341656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009982150979340076, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009858286939561367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008883346803486347, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007992000319063663, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007775572594255209, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005955970846116543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005037789233028889, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010799030773341656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005037789233028889, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2335503250360489, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20859459042549133, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1975759118795395, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17716127634048462, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.1068209633231163, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0962371900677681, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13234955072402954, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11847419291734695, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11090429872274399, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09289716184139252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08870130777359009, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06792670488357544, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05663534998893738, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.051304034888744354, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04995812475681305, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.033962227404117584, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026275694370269775, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025667177513241768, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02340278960764408, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022534042596817017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01745893619954586, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01644773781299591, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01481605600565672, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01023779809474945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01023779809474945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.01023779809474945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23656706511974335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20287775993347168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.19143114984035492, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1625518947839737, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10832355171442032, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09487592428922653, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12776711583137512, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11640497297048569, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11208795756101608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08652155846357346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08054868876934052, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06572858989238739, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056188974529504776, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.052511245012283325, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05162156745791435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03305448219180107, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.028020331636071205, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027631958946585655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02386864274740219, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02327148988842964, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017986292019486427, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.018701519817113876, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016565794125199318, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013824041932821274, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013824041932821274, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013824041932821274, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.15460389852523804, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1443934142589569, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1403856724500656, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12683728337287903, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07261466979980469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06857673823833466, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0824972465634346, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07613976299762726, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07386580109596252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.065033458173275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06156580522656441, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.042067162692546844, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.036530788987874985, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.034950125962495804, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.034568093717098236, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.0210750550031662, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018250487744808197, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.018073154613375664, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01674789935350418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016511712223291397, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011214169673621655, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01152115035802126, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01061214692890644, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008051156997680664, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011214169673621655, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008051156997680664, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2396315634250641, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.22442272305488586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2191278338432312, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.19803276658058167, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11254726350307465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10685180872678757, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.12675026059150696, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.11685404181480408, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1142917200922966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10101404041051865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09559031575918198, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06454284489154816, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.055908188223838806, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05398061126470566, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05352848395705223, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03232317790389061, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.027779363095760345, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.027569251134991646, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.025428231805562973, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02514573186635971, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01706630550324917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.016848871484398842, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016309505328536034, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011115738190710545, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011115738190710545, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011115738190710545, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21876615285873413, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19433461129665375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18412241339683533, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16199110448360443, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09978073835372925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08979804068803787, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11937549710273743, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10892356932163239, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10294561088085175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08466565608978271, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07975641638040543, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06089682877063751, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.052234672009944916, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.048027291893959045, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04698130488395691, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.030702825635671616, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02522031031548977, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024805869907140732, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02227928303182125, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021617839112877846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016620583832263947, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01646331138908863, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015161597169935703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011529238894581795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011529238894581795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011529238894581795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10948239266872406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09917628765106201, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.0919652059674263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08285583555698395, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.050316326320171356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.044648606330156326, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06554245203733444, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05920173227787018, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05202680453658104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.044449105858802795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.042958687990903854, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03340928256511688, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02833627536892891, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024343838915228844, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02331601455807686, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016758130863308907, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012868211604654789, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012350725010037422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011723780073225498, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011076193302869797, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008921986445784569, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008803959004580975, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007391972001641989, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006036490201950073, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012868211604654789, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006036490201950073, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09454050660133362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08466280996799469, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.075042225420475, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06766736507415771, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042836129665374756, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03598407283425331, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.060868918895721436, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05496438592672348, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04459499195218086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03787246346473694, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03714584931731224, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030909063294529915, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026261230930685997, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02075834386050701, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019240928813815117, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015388230793178082, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010972698219120502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010188816115260124, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01000447478145361, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009052271954715252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008043996058404446, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007891714572906494, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006015293300151825, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005162325222045183, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010972698219120502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005162325222045183, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23122796416282654, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2041950821876526, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19079847633838654, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17075397074222565, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10482543706893921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09286336600780487, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13359901309013367, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11932449042797089, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10954272747039795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09049632400274277, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08672061562538147, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06860718876123428, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.056946709752082825, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.050296321511268616, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.048614948987960815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.034383323043584824, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.025831667706370354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02503049746155739, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022919688373804092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.021798888221383095, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017682775855064392, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.0164678692817688, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014481029473245144, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010163587518036366, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010163587518036366, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010163587518036366, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.24541179835796356, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.21926794946193695, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.21018758416175842, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.17703887820243835, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11297047138214111, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.10291077196598053, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1303188055753708, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11973392963409424, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11562351137399673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.09355804324150085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08424682170152664, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.0666874498128891, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0574541874229908, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.054328612983226776, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05357683077454567, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03344740346074104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02844870276749134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.028110289946198463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.024638811126351357, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.024153664708137512, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01790463924407959, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01817512884736061, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016678864136338234, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012762627564370632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012762627564370632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012762627564370632, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.15309381484985352, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14319689571857452, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13948947191238403, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12611590325832367, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07193414866924286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06806962192058563, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08138207346200943, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07510258257389069, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0730992928147316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06449579447507858, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.061104852706193924, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04147607833147049, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03602824732661247, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03460016846656799, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03426545485854149, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020785441622138023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018023449927568436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.017866095528006554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016541093587875366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01632891595363617, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011073756031692028, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01128416322171688, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01053536869585514, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007831688970327377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011073756031692028, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007831688970327377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24989371001720428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23446440696716309, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22921410202980042, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20717982947826385, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11751438677310944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11171974241733551, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13174423575401306, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12156923115253448, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11924088001251221, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10557062178850174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09986161440610886, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06713473796844482, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05820078030228615, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0563758909702301, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.055936481803655624, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03358772397041321, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029023831710219383, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028827356174588203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026594597846269608, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.026318848133087158, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017723169177770615, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017598243430256844, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016992943361401558, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011662546545267105, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011662546545267105, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011662546545267105, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21865877509117126, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19411034882068634, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18368180096149445, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.162092924118042, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09946723282337189, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08939892053604126, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.1192312091588974, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10895487666130066, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10267051309347153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08450396358966827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07983507215976715, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06045815348625183, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.052112944424152374, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04780927300453186, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04674316942691803, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0304754339158535, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02500195801258087, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024561354890465736, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022066857665777206, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02137831784784794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016431771218776703, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01619981974363327, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014948364347219467, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01113811694085598, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01113811694085598, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01113811694085598, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10934126377105713, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09862653911113739, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09110220521688461, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08201419562101364, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05018432438373566, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04419473931193352, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06570220738649368, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.05951470136642456, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05198250338435173, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04420603811740875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.042821746319532394, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033556461334228516, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028522640466690063, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024287749081850052, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02317790687084198, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016865838319063187, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012841582298278809, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012296847067773342, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.01167976576834917, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010997314006090164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008987904526293278, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008842049166560173, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007391474209725857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006064613349735737, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012841582298278809, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006064613349735737, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09998158365488052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08946298807859421, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07919862121343613, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07139478623867035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.045345429331064224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.038019999861717224, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.0641988143324852, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.058266300708055496, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04721870645880699, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.039972733706235886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.039135802537202835, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.032557424157857895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027831649407744408, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.021995222195982933, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.02038855291903019, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016312573105096817, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01163749024271965, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010818708688020706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01060370821505785, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009599976241588593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008547759614884853, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008391369134187698, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006447238847613335, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005551087204366922, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01163749024271965, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005551087204366922, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2245853841304779, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.19666093587875366, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18231861293315887, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16312378644943237, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.1012648344039917, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.08865446597337723, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13134276866912842, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11708959937095642, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10621795058250427, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08701428025960922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08360868692398071, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06709225475788116, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.055800579488277435, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04863051697611809, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.046790022403001785, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03350359946489334, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.024982253089547157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02412496879696846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.022101012989878654, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.020896144211292267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.017190054059028625, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016144217923283577, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013912402093410492, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010000712238252163, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.013912402093410492, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010000712238252163, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23112308979034424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19646090269088745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18377290666103363, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.14437924325466156, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.1063389927148819, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09248358011245728, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12647292017936707, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11534196138381958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10998132824897766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07916367053985596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0718030259013176, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06467729806900024, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.055556654930114746, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05154702439904213, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05055255442857742, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.032498035579919815, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027393601834774017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026942335069179535, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022215742617845535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02154030278325081, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017626745626330376, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01821022666990757, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.016050288453698158, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013295468874275684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013295468874275684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.013295468874275684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16606657207012177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15528970956802368, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15131627023220062, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13671395182609558, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07802040874958038, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07382229715585709, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.0880865603685379, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08140680938959122, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07927592098712921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0699070394039154, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06615785509347916, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04488653317093849, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.038992155343294144, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03746333345770836, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03710044175386429, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022437678650021553, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.019384261220693588, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.0192100927233696, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01775382086634636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.017526697367429733, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011818666011095047, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011954612098634243, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011227940209209919, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00806997437030077, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011818666011095047, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00806997437030077, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25490713119506836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.23904511332511902, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23357068002223969, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21112443506717682, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11985232681035995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11389605700969696, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13417379558086395, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12401126325130463, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12164090573787689, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10758639127016068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10172127187252045, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06833264231681824, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.059288881719112396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.057415854185819626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05696644261479378, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.034132905304431915, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.029423072934150696, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02921414189040661, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.026909274980425835, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02663125842809677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01782909221947193, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.0176400076597929, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017080320045351982, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01141519844532013, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01141519844532013, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01141519844532013, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2244676649570465, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1992703676223755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1886061131954193, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16720445454120636, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10211281478404999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09189160168170929, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12273481488227844, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11184068769216537, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10548091679811478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08709097653627396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08255920559167862, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.062192682176828384, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05349560081958771, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04909571632742882, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.048003219068050385, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031354136765003204, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025678779929876328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025232912972569466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022732071578502655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.022026468068361282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.017017072066664696, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016643404960632324, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.01550678163766861, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011454831808805466, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011454831808805466, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011454831808805466, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1113843321800232, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10094909369945526, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09372924268245697, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.084358349442482, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.051164641976356506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.045430950820446014, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06626525521278381, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060100238770246506, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05287951976060867, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04518897831439972, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04358413442969322, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.033793482929468155, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.02873355895280838, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02471177466213703, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.023681094869971275, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016931619495153427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012981800362467766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012471655383706093, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011808456853032112, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011163895949721336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008970743976533413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008791070431470871, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007453522179275751, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00593013409525156, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012981800362467766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00593013409525156, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09882045537233353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08812365680932999, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07916464656591415, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07131649553775787, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.044681254774332047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.0381447896361351, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06109074875712395, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.055823683738708496, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04662487283349037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03929606080055237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.038217999041080475, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03099595196545124, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.026591649278998375, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.021587258204817772, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.020272579044103622, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015537712723016739, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011369532905519009, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01069498062133789, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010315715335309505, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009481308981776237, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008093636482954025, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00797706376761198, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006274905055761337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005245898384600878, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011369532905519009, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005245898384600878, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23729035258293152, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21218138933181763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.20070169866085052, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17987273633480072, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10848301649093628, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09763402491807938, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1336076557636261, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1207122653722763, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11252839863300323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09433445334434509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08989011496305466, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06817460805177689, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05767429992556572, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.052024856209754944, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05064947158098221, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03407193720340729, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026649324223399162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025983605533838272, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023755915462970734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022829994559288025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0174709502607584, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.016691863536834717, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014915475621819496, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010317830368876457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010317830368876457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010317830368876457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.22592759132385254, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1937749981880188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1828683465719223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.14411817491054535, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.1027388870716095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09020771831274033, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1211356371641159, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11097921431064606, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10659011453390121, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07919993251562119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07068780064582825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06195659190416336, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05336736887693405, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04965805634856224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.048748794943094254, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03105812333524227, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026165606454014778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.025768712162971497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02160927653312683, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020981401205062866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016671275720000267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01709567941725254, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015205918811261654, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01216040551662445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01216040551662445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01216040551662445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1694028526544571, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15849679708480835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15446321666240692, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13942530751228333, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07960492372512817, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07533399760723114, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08998075127601624, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08296818286180496, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.08087142556905746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0713014230132103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06749704480171204, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04578962177038193, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03977208212018013, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03824770078063011, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03788066282868385, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022964565083384514, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.019848128780722618, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01967538334429264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.018180735409259796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01795097067952156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0122522646561265, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012308496050536633, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011677433736622334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008416211232542992, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.0122522646561265, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008416211232542992, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25973206758499146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24350540339946747, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2379559874534607, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21497276425361633, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12217876315116882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11602863669395447, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13677628338336945, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12638480961322784, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12399452179670334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10958360880613327, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10360274463891983, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06967566162347794, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06048381328582764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.058566827327013016, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05811845511198044, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03483577445149422, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.030054595321416855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029851676896214485, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02748596854507923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027199842035770416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.01827526092529297, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018092036247253418, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017500953748822212, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011808933690190315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011808933690190315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011808933690190315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22571855783462524, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1998729109764099, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18878313899040222, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16762226819992065, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.1025395542383194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09192655980587006, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12309098988771439, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11290194094181061, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10612395405769348, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.0874050185084343, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08309890329837799, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06265991181135178, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05404505878686905, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04929957538843155, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04814086854457855, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031511735171079636, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025795118883252144, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.02532636933028698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022825568914413452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02207871526479721, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016875650733709335, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01676960289478302, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015218392945826054, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011527911759912968, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011527911759912968, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011527911759912968, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11775317788124084, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10710682719945908, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09947048872709274, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08969676494598389, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05433756858110428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.048287466168403625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06995949894189835, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06375189870595932, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05599304661154747, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04804569482803345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04639095067977905, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03566858544945717, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030545037239789963, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.026273401454091072, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.025183312594890594, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017852209508419037, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01387082226574421, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.01331464946269989, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012641831301152706, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011963510885834694, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009408745914697647, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009460864588618279, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007837572135031223, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006481700576841831, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01387082226574421, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006481700576841831, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10605543851852417, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09559192508459091, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08575449883937836, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07739206403493881, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.048383474349975586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.04120159149169922, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06696605682373047, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06092795357108116, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05016028881072998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04282401129603386, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04180614650249481, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03399185836315155, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.029159322381019592, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.023392291739583015, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.021818112581968307, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0170139167457819, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01224772073328495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01143023930490017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011156612075865269, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010176246054470539, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008875234983861446, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008604055270552635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006823744159191847, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005507417023181915, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01224772073328495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005507417023181915, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2511994242668152, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22695989906787872, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21679814159870148, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19461701810359955, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11581305414438248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10572436451911926, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13994120061397552, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12645721435546875, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11954429000616074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10131590068340302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09638075530529022, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07125921547412872, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06039831414818764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05549084395170212, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05428949370980263, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035590894520282745, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028355050832033157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027804220095276833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025387978181242943, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02459804341197014, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018249213695526123, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017441265285015106, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.01596089079976082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010768703185021877, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010768703185021877, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010768703185021877, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.2196550965309143, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19181422889232635, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.18017318844795227, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1519293338060379, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10125073045492172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08912279456853867, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12139302492141724, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11117102950811386, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10412497073411942, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08166001737117767, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07454518228769302, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06202907860279083, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05331847444176674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04878664389252663, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04767630621790886, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0310605950653553, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.025484001263976097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.024921435862779617, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.02159830369055271, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020832469686865807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016500625759363174, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01648792065680027, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014750901609659195, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011244484223425388, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011244484223425388, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011244484223425388, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1651759147644043, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1545066088438034, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.15051640570163727, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13582003116607666, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07765650749206543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07346751540899277, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08764354139566422, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08097440749406815, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0789172500371933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06951651722192764, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06575696915388107, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04464681074023247, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03881324455142021, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03731844574213028, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03696700930595398, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022339235991239548, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.019357893615961075, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01919194869697094, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.01771712861955166, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01749775931239128, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011812829412519932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011993485502898693, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011244773864746094, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008184739388525486, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011812829412519932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008184739388525486, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.26269638538360596, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2462557554244995, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24073337018489838, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2173796147108078, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12365727871656418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.1174430325627327, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13877281546592712, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12785425782203674, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12546013295650482, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11083230376243591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10483627766370773, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07061503827571869, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.061256397515535355, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.05935079604387283, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05889127030968666, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.035394079983234406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.030594833195209503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.030383877456188202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027992112562060356, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027712395414710045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018730025738477707, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018616609275341034, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017972605302929878, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012419324368238449, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012419324368238449, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.012419324368238449, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.22503845393657684, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1990056186914444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18752458691596985, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16745534539222717, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10209288448095322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09125837683677673, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12344364821910858, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11290083825588226, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.105677530169487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08712887763977051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08320289105176926, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06254546344280243, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05396713316440582, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04906833916902542, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04784820228815079, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03152688965201378, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025607971474528313, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.025102144107222557, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02266163006424904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021866733208298683, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01698889024555683, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016611523926258087, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015328224748373032, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011265980079770088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011265980079770088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011265980079770088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.12331859022378922, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.11244936287403107, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10474727302789688, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09444329142570496, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05700401961803436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.05087482929229736, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.07292665541172028, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.0664936900138855, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05866166576743126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.050431907176971436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04862096905708313, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.037141673266887665, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.031799010932445526, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02751261368393898, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02642097696661949, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.0186044629663229, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.014406860806047916, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013839928433299065, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013109983876347542, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.012417570687830448, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.00976312905550003, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009662306867539883, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.008159885182976723, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006443103309720755, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013839928433299065, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006443103309720755, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10678382962942123, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09661346673965454, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08704401552677155, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07845749706029892, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.048848122358322144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.04186127334833145, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06655161827802658, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.06111176684498787, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.05050213262438774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04325992614030838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04200943931937218, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03370225802063942, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.029175003990530968, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.0235814917832613, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.02208021469414234, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016845086589455605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012312018312513828, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011519437655806541, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.011207204312086105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010258357971906662, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008752081543207169, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008564835414290428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006776498630642891, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005438108928501606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.012312018312513828, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005438108928501606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.25270211696624756, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2289906144142151, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21857000887393951, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19631624221801758, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11681672930717468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10664067417383194, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.14158794283866882, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12766070663928986, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.12027854472398758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10240791738033295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09741142392158508, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07220900803804398, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06094631925225258, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05599960312247276, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05478125065565109, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03608003631234169, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028594352304935455, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027997996658086777, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02563267946243286, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.024835532531142235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018514864146709442, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01757827401161194, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.016169629991054535, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010801100172102451, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010801100172102451, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010801100172102451, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23591801524162292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.20024843513965607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1874205619096756, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15636271238327026, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10826607048511505, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09322773665189743, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12903012335300446, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.1173807755112648, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.11249015480279922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08453106880187988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07643692195415497, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06590570509433746, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.056366510689258575, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05217406526207924, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05113896727561951, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03305423632264137, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02714795432984829, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.02666715905070305, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022412583231925964, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.021694406867027283, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.017676260322332382, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.017322003841400146, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015910547226667404, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011728344485163689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011728344485163689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011728344485163689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1645299345254898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.1537371277809143, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14980526268482208, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.13512495160102844, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07743149995803833, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07321065664291382, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08742374926805496, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.0807400569319725, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07867598533630371, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06922418624162674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0654369443655014, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.044612836092710495, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03871629387140274, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.037209540605545044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03685880824923515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022368911653757095, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01932843215763569, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01916181854903698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017685748636722565, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01746439002454281, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011935535818338394, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01201000064611435, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011365555226802826, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008231584914028645, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011935535818338394, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.008231584914028645, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.2627090811729431, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.24614855647087097, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.24054647982120514, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.21708638966083527, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12377773970365524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11749164015054703, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.1383809745311737, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12801983952522278, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12561547756195068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.11082227528095245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10463344305753708, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0704418197274208, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.061206236481666565, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.0592910498380661, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05883602052927017, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03517308831214905, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03032495081424713, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.030117684975266457, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.027671780437231064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.027379624545574188, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018255911767482758, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.018101641908288002, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.017466258257627487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0116015849635005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0116015849635005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.0116015849635005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2242383062839508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.19760745763778687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.18550628423690796, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16634495556354523, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.10152561217546463, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.09025300294160843, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12345626205205917, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11324469745159149, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10531766712665558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08681416511535645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08315447717905045, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06278876960277557, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.054157182574272156, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04880310595035553, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04747477546334267, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.03146188333630562, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02553103305399418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024976061657071114, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02265123464167118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021790722385048866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01677515171468258, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.016710331663489342, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014934013597667217, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01139215286821127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01139215286821127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01139215286821127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11646072566509247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10560376942157745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09752368181943893, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08788613975048065, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.053671810775995255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04735079035162926, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06993885338306427, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06370746344327927, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05544379726052284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04742972552776337, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04584714397788048, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.035631678998470306, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.030533187091350555, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.025951746851205826, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02476791851222515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017833156511187553, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0136681217700243, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013050382025539875, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012438999488949776, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011687548831105232, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009317372925579548, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009351572953164577, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007599800359457731, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006292268633842468, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.0136681217700243, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006292268633842468, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10170433670282364, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09142495691776276, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08083891868591309, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07296139001846313, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04624919220805168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03876060992479324, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06568419933319092, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05978835001587868, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04805174097418785, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.04089592769742012, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0400228276848793, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03321424499154091, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02847689390182495, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.02238748036324978, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.020708974450826645, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016628235578536987, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011791152879595757, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010908691212534904, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010740693658590317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009690960869193077, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008730464614927769, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008451178669929504, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006558520719408989, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005448156502097845, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011791152879595757, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005448156502097845, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.24302905797958374, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.21621423959732056, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.2033921778202057, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1821472942829132, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11086394637823105, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09891916811466217, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1396806836128235, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12505944073200226, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.1153680682182312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09616787731647491, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09224458038806915, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07149595767259598, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05973879620432854, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05318176746368408, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05155399441719055, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035775281488895416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.027246762067079544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.02648170292377472, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02424943447113037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.02316996082663536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018391041085124016, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01721402443945408, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.015283391810953617, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010594976134598255, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010594976134598255, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010594976134598255, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.23949560523033142, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.2082698941230774, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1979004144668579, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1692279428243637, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.11032594740390778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.09799888730049133, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12806186079978943, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11776501685380936, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.1138443723320961, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08902769535779953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.08228116482496262, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06548053026199341, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05644451826810837, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05302724614739418, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.05220208689570427, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03271739184856415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.027492757886648178, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.027110254392027855, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.023245831951498985, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.022679124027490616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.01724063605070114, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01725577749311924, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015879204496741295, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587020009756088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587020009756088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.011587020009756088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.16479617357254028, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.15393948554992676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1499817818403244, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1352478563785553, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07757207751274109, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.07330835610628128, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08760195225477219, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.08091163635253906, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07884103059768677, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06931547820568085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06547915935516357, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04460972920060158, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03877931088209152, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.037261586636304855, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03690138831734657, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.022315360605716705, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.019281357526779175, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.019114317372441292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017615703865885735, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.017389357089996338, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011743983253836632, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011880343779921532, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.011161419562995434, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00802200473845005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011743983253836632, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00802200473845005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.25984299182891846, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2433728575706482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.23778656125068665, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.2145136296749115, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.12248838692903519, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.11619514971971512, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.137369766831398, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.12678876519203186, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.12434163689613342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10961087793111801, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.10359666496515274, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.07001889497041702, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.06066525727510452, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.058736417442560196, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.058278728276491165, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03501593694090843, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.03014792501926422, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.029935197904706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02751421369612217, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.02721993811428547, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.018423741683363914, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01815345697104931, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01764664240181446, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011827846989035606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011827846989035606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011827846989035606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.21875159442424774, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.1915513128042221, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.17880968749523163, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.16069340705871582, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09896805137395859, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08709858357906342, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12145717442035675, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11149764806032181, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10298440605401993, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08434291183948517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.08110395073890686, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.061878807842731476, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05352238565683365, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.047725144773721695, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0462653711438179, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.031296420842409134, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.025164877995848656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024558065459132195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.022329965606331825, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.02139580063521862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016972031444311142, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01680806092917919, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.015030192211270332, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011706698685884476, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011706698685884476, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011706698685884476, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11891800165176392, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10791868716478348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.10094618797302246, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.09103403985500336, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05489147827029228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04912623018026352, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06952211260795593, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06325574964284897, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05664467439055443, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04845728352665901, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04653993248939514, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.035500992089509964, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0303261186927557, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.026513447985053062, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.025542937219142914, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017776252701878548, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01393579039722681, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.013433195650577545, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012669514864683151, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01206235121935606, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009354844689369202, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.009352252818644047, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007867705076932907, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006362289190292358, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.01393579039722681, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006362289190292358, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.10305760055780411, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.09366384893655777, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.08665230870246887, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.07797417789697647, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04734991118311882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.04191996157169342, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06162560358643532, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05615495517849922, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04880363494157791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.041901107877492905, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.04033980518579483, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.031200813129544258, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.0268451739102602, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.022841528058052063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.021836690604686737, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015633894130587578, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011971009895205498, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.011430664919316769, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.01088880468159914, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.010237591341137886, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008186320774257183, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00810052640736103, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.006710641551762819, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005346706602722406, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011971009895205498, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005346706602722406, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.225119948387146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.203887477517128, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.19449101388454437, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1747869998216629, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10391395539045334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09481557458639145, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.12513433396816254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.11387424170970917, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10717540234327316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09108497947454453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08663944900035858, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.06370896846055984, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05438893288373947, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.04982806369662285, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04866984859108925, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03181305155158043, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.025410475209355354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.0248658899217844, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02275564894080162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022002629935741425, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01633492484688759, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.015611587092280388, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014414174482226372, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009515007957816124, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009515007957816124, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.009515007957816124, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.20402687788009644, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.18035456538200378, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1720762848854065, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.1467176228761673, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.0942075252532959, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.08481074869632721, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.1099303737282753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.10073848813772202, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.09731832146644592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07804958522319794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.0708785280585289, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.056375060230493546, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.048446644097566605, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04545600712299347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04474303871393204, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02835174836218357, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.023916998878121376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.023606475442647934, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.020746219903230667, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.020290063694119453, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.015273317694664001, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.015473481267690659, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014087243936955929, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010988298803567886, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.014087243936955929, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.010988298803567886, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1573924422264099, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.14681512117385864, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.14282512664794922, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.12868550419807434, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.07408922910690308, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06988988816738129, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.08396704494953156, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07751531898975372, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07536454498767853, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06610214710235596, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.06248852238059044, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.04285721108317375, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03718166425824165, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.035632289946079254, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.035259976983070374, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.021447641775012016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018497364595532417, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.018324758857488632, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016893602907657623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016664568334817886, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011375685222446918, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.011504020541906357, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010784603655338287, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007866401225328445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011375685222446918, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007866401225328445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.24599076807498932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.2301432490348816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.22470344603061676, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.20256417989730835, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.11592917889356613, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.10989350825548172, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.13007493317127228, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.1201862245798111, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.11776775121688843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.10361930727958679, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.09783567488193512, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.06632637232542038, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.057531148195266724, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.055607039481401443, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.05516120418906212, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.033137544989585876, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.028572652488946915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.028355546295642853, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.02605769969522953, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.025761794298887253, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.017350731417536736, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017241885885596275, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.016566812992095947, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011270293965935707, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011270293965935707, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.011270293965935707, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.2164514660835266, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.18768525123596191, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1742873191833496, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.1569552719593048, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.09765210002660751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.08508038520812988, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.12154501676559448, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.11059021949768066, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.10189158469438553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.08259154856204987, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07962769269943237, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.06171523779630661, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.05301041528582573, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.04711642861366272, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04564374312758446, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.0311576034873724, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.024892741814255714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.024277692660689354, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.021977992728352547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.021026719361543655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.016939878463745117, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01670794188976288, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014932462945580482, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011684585362672806, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011684585362672806, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011684585362672806, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.11123762279748917, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.10087183862924576, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09204564988613129, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08297675102949142, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.05119120702147484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.044540584087371826, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06911851465702057, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.06245901435613632, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.052914585918188095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.04535652697086334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04414970427751541, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03531169891357422, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.0299568772315979, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.024813232943415642, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02345617115497589, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017677638679742813, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013148531317710876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012444392777979374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.012012792751193047, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.011163918301463127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009297933429479599, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.00922016054391861, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007349675055593252, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006253024097532034, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.013148531317710876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.006253024097532034, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09476888179779053, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08499323576688766, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07314590364694595, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06617461889982224, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.042865633964538574, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03487101197242737, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06404999643564224, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.057986486703157425, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04468008130788803, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03809085488319397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03767582029104233, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03229008987545967, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027594003826379776, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020833617076277733, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018888473510742188, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.016233904287219048, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011059909127652645, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.01004776544868946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010141009464859962, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008944476023316383, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008456622250378132, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008228139951825142, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005980828311294317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005344053730368614, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.011059909127652645, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005344053730368614, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2351466566324234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.2087089568376541, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.1948813945055008, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.17467643320560455, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10724791884422302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09482964128255844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13737979531288147, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12293543666601181, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11177113652229309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09301875531673431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08923955261707306, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07044670730829239, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.058822717517614365, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.051549606025218964, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.04972568899393082, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.03536960855126381, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026662085205316544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025751084089279175, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.023791972547769547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.022595757618546486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.01820676028728485, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017307311296463013, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014847258105874062, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011002825573086739, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011002825573086739, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011002825573086739, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.22842472791671753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.19870957732200623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.1883302927017212, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.15808698534965515, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.10470040142536163, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.092855304479599, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.12292639911174774, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.11248790472745895, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.10836616158485413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.08465892821550369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.07639153301715851, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.06301645189523697, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.05405711010098457, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.05053366348147392, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.049673452973365784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.03154868260025978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.026560328900814056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.026156678795814514, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.022618070244789124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.02204485982656479, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.016869064420461655, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01719994843006134, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.015432109124958515, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012157278135418892, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012157278135418892, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.012157278135418892, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1440422236919403, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13407257199287415, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1301049143075943, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11724818497896194, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06774120032787323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06370150297880173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07729510217905045, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07134748250246048, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06900632381439209, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06037885695695877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05710461363196373, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.0394897535443306, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03423343598842621, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.032604970037937164, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.032214511185884476, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01976281777024269, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.016951048746705055, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01676618680357933, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015472158789634705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.015226335264742374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010487021878361702, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.010612028650939465, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.009867941960692406, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007273644674569368, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010487021878361702, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007273644674569368, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.22229990363121033, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.20744270086288452, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.2021285593509674, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.1822126805782318, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.10468762367963791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.09894111752510071, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.11814974248409271, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.10903473943471909, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.1064239963889122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.09346412122249603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.08824753761291504, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.060283783823251724, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.05224338173866272, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.050281964242458344, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.04981766268610954, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.03017723187804222, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.025977201759815216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.025759322568774223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.0236973837018013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.023408107459545135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015949010848999023, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.015938054770231247, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.01515905000269413, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01070403028279543, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01070403028279543, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.01070403028279543, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.1957320272922516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.16878339648246765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1561015099287033, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.13913540542125702, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0884169489145279, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0765000730752945, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.11006799340248108, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.10052885860204697, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.09229770302772522, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.07381269335746765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07087235152721405, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05632918328046799, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.0483657568693161, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.042775582522153854, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.041376203298568726, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.028614990413188934, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.022806808352470398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.022219926118850708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.02001940831542015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.01911179907619953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.015882126986980438, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015585126355290413, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014021546579897404, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01115840207785368, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.014021546579897404, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.01115840207785368, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10899388790130615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09883172065019608, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.09064091742038727, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.08178400248289108, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04996968433260918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.043812669813632965, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06777505576610565, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060410890728235245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.05179280787706375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.044387515634298325, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04329433664679527, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03458582982420921, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028943276032805443, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.02417900413274765, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02292877994477749, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.017328469082713127, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012751649133861065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.012114161625504494, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011657783761620522, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010879823006689548, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009081301279366016, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008836058899760246, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.007166413124650717, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005933344829827547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012751649133861065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.005933344829827547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09244661033153534, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08429941534996033, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07560201734304428, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06817734241485596, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04227285087108612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03627639636397362, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.05921200290322304, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05367133021354675, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04368579015135765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.037767380475997925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.0368349589407444, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.030056100338697433, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02554303966462612, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020465420559048653, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.019067808985710144, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015037629753351212, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010770967230200768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.010027037002146244, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0098881172016263, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.009007974527776241, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.007821726612746716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.007638924289494753, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.00592348538339138, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004945980850607157, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010770967230200768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.004945980850607157, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.2505604922771454, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.22848643362522125, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.21817390620708466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.19651496410369873, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.11617226898670197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.10652188211679459, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.1407807618379593, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.1272830367088318, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.11963655799627304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.10259450227022171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.09788148105144501, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0719316303730011, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.06094520539045334, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.055756956338882446, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.05446682497859001, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.0361257940530777, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.028582092374563217, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.027952156960964203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.025811156257987022, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.024954665452241898, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.0185554102063179, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017771223559975624, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.016142673790454865, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011095632798969746, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011095632798969746, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011095632798969746, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.1903042197227478, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.16215632855892181, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15288373827934265, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.13161469995975494, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.08734235912561417, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.07583972811698914, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.10308310389518738, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.09271828830242157, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0904805064201355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.07006245851516724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06326759606599808, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.052852753549814224, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.044506389647722244, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.04206017404794693, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04147644713521004, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.02647283487021923, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.02197587862610817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.021733101457357407, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018566425889730453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.018158769235014915, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014066314324736595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.013998331502079964, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.012945519760251045, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009752129204571247, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.014066314324736595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.009752129204571247, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1400197297334671, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13013747334480286, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12610989809036255, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11364743858575821, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06575994193553925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06172385811805725, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07522831112146378, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06940711289644241, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06700076162815094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.05855134129524231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05541025102138519, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03842008858919144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.03332730382680893, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.031675662845373154, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03128248080611229, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01923726499080658, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.016557443886995316, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.016366485506296158, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.015129083767533302, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.014875931665301323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010256308130919933, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.01050969772040844, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.009628744795918465, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007344333454966545, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.010256308130919933, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.007344333454966545, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1924734115600586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.17943283915519714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.17459847033023834, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.15735386312007904, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.09119374305009842, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.08607754111289978, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.10326989740133286, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.09526881575584412, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.0927555188536644, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.08144769817590714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.07715186476707458, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.053308822214603424, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.04656800627708435, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.044755686074495316, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.044343315064907074, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.026847492903470993, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.02472885325551033, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.02454867586493492, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.022949829697608948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.022705497220158577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.015134532004594803, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.017293939366936684, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.014475883916020393, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013923056423664093, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013923056423664093, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.013923056423664093, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.19586560130119324, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.17014944553375244, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.1588457077741623, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.14071065187454224, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.08900941908359528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.07802805304527283, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.10911199450492859, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.09925886243581772, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.09237047284841537, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.074189692735672, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.07053238898515701, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.05530805140733719, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.04759347066283226, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.0430053249001503, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.04187453165650368, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.027858451008796692, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.02282463200390339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.022351263090968132, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0199811439961195, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.019234756007790565, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.01505228690803051, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.015313236974179745, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.013448064215481281, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010938561521470547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.013448064215481281, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.010938561521470547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10513037443161011, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09495238959789276, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08541178703308105, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07704246789216995, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.048205383121967316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.041221387684345245, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06699895858764648, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.060364387929439545, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04997512325644493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.042638927698135376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.04170689359307289, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.034187979996204376, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.028951726853847504, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.023420052602887154, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.021935569122433662, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01714448258280754, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012485790997743607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011717033572494984, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.011407364159822464, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010481949895620346, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.009050275199115276, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008952680975198746, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.006961435079574585, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00610083295032382, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012485790997743607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.00610083295032382, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09352144598960876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.08391358703374863, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.07179859280586243, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06488247960805893, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.0422406829893589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.034114982932806015, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06411626935005188, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05772720277309418, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.044065628200769424, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.03761047124862671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.037359364330768585, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.03244812786579132, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.027542462572455406, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020584886893630028, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018544290214776993, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.01634318381547928, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010938269086182117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.009890125133097172, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.010044709779322147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.00880380067974329, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008498931303620338, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.008221893571317196, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.00589245418086648, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00534438481554389, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.010938269086182117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.00534438481554389, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23076178133487701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20360000431537628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18843616545200348, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.1690351665019989, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10447748005390167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09146842360496521, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13662110269069672, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12217318266630173, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.10945513099431992, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.09060922265052795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08757515251636505, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07036561518907547, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05836081504821777, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.05034578591585159, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.048251066356897354, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035463713109493256, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.026132656261324883, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025122739374637604, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.0233440063893795, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.021988291293382645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018329758197069168, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.017298296093940735, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014580254442989826, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011083714663982391, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011083714663982391, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.011083714663982391, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.18348678946495056, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.1603163778781891, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.15292717516422272, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.13147151470184326, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.08457249402999878, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.07598527520895004, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.097690649330616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.08899751305580139, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.08680932968854904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.06845369935035706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.06429976224899292, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.05016889050602913, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.042894527316093445, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.0408761166036129, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.04039962217211723, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.025137940421700478, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.021652361378073692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.021429574117064476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.018563248217105865, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.01824028044939041, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013615550473332405, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.014135072007775307, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.012799463234841824, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01034840289503336, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.013615550473332405, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01034840289503336, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.14086562395095825, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13117238879203796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.12730573117733002, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11479366570711136, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06665775179862976, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06275490671396255, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07600684463977814, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07010272145271301, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06783374398946762, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.0594879612326622, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05642576888203621, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.039284780621528625, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.034263111650943756, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.032735615968704224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03236943110823631, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.019830966368317604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01807241514325142, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.017913885414600372, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.016761669889092445, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.016551177948713303, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011208342388272285, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012657607905566692, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010652175173163414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010161350481212139, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011208342388272285, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010161350481212139, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.14628541469573975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.13638190925121307, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.13271945714950562, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.11970991641283035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06928086280822754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.06539839506149292, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07856348156929016, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.07235943526029587, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.07044404000043869, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.06185866519808769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.0585683137178421, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.040575504302978516, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.035263270139694214, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.03387311473488808, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.03354310616850853, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.020448870956897736, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.018527628853917122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01837480254471302, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.017149847000837326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.01695152558386326, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011489499360322952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.012753704562783241, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.010982945561408997, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010074432939291, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.011489499360322952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.010074432939291, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.15944169461727142, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.13945923745632172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.13071022927761078, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.11393113434314728, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.0731801837682724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.0648106038570404, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.08996456116437912, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.08017608523368835, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.07525254040956497, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.06037183851003647, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.05716918781399727, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.045350756496191025, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.03897375240921974, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.035917382687330246, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.03514713793992996, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.023201286792755127, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.019893864169716835, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.019557245075702667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.017589382827281952, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.017101040109992027, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013362758792936802, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.01418045163154602, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.012356890365481377, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011209049262106419, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.013362758792936802, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.011209049262106419, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.q_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10189735889434814, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09193126857280731, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08285598456859589, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07460939884185791, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.046628355979919434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.03995862603187561, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.06451524794101715, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.058256398886442184, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.04837160184979439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.041178394109010696, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.040245626121759415, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.03291882947087288, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.027899350970983505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.022631073370575905, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.021213335916399956, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.016482627019286156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012027738615870476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.011315084993839264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.010975985787808895, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.010114384815096855, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008681645616889, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.008580462075769901, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.0067130704410374165, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0058437916450202465, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.012027738615870476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.0058437916450202465, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.k_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.09142409265041351, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.0819706991314888, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.0706465020775795, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.06379827857017517, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.04132058843970299, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.03372456878423691, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.06252337247133255, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.05569617822766304, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.04310231655836105, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.036645904183387756, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.03631337359547615, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.0317235067486763, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.02650098130106926, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.020094547420740128, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.018280373886227608, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.015854446217417717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01071442011743784, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.00978843029588461, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.009815109893679619, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.008711510337889194, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.008328409865498543, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.00798053853213787, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.005920178256928921, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005280663724988699, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.01071442011743784, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.005280663724988699, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.v_proj", "numel": 4194304, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.18115234375, "total_bits": 9148416.0, "err": 0.23222671449184418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37646484375, "total_bits": 9967616.0, "err": 0.20233812928199768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62646484375, "total_bits": 11016192.0, "err": 0.18566371500492096, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.73583984375, "total_bits": 11474944.0, "err": 0.16642232239246368, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22802734375, "total_bits": 13539328.0, "err": 0.10480187088251114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.73583984375, "total_bits": 15669248.0, "err": 0.09032250195741653, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0316162109375, "total_bits": 12715520.0, "err": 0.13796836137771606, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12646484375, "total_bits": 13113344.0, "err": 0.12379170209169388, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.18115234375, "total_bits": 13342720.0, "err": 0.1101965382695198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.53271484375, "total_bits": 14817280.0, "err": 0.08996598422527313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.672607421875, "total_bits": 15404032.0, "err": 0.08720248192548752, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0316162109375, "total_bits": 16909824.0, "err": 0.07102596014738083, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12646484375, "total_bits": 17307648.0, "err": 0.05918280780315399, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22802734375, "total_bits": 17733632.0, "err": 0.050455037504434586, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32958984375, "total_bits": 18159616.0, "err": 0.048178307712078094, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0316162109375, "total_bits": 21104128.0, "err": 0.035619162023067474, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22802734375, "total_bits": 21927936.0, "err": 0.02611183188855648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.34521484375, "total_bits": 22419456.0, "err": 0.025005828589200974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.53271484375, "total_bits": 23205888.0, "err": 0.02313431166112423, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.73583984375, "total_bits": 24057856.0, "err": 0.021653717383742332, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0316162109375, "total_bits": 25298432.0, "err": 0.018375156447291374, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12646484375, "total_bits": 25696256.0, "err": 0.01729881949722767, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2816162109375, "total_bits": 26347008.0, "err": 0.014496580697596073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010891279205679893, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010891279205679893, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12646484375, "total_bits": 34084864.0, "err": 0.010891279205679893, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.o_proj", "numel": 16777216, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1800537109375, "total_bits": 36575232.0, "err": 0.10442282259464264, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 39852032.0, "err": 0.09127435833215714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 44046336.0, "err": 0.08736041188240051, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7347412109375, "total_bits": 45881344.0, "err": 0.07494837045669556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2269287109375, "total_bits": 54138880.0, "err": 0.04649215564131737, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7347412109375, "total_bits": 62658560.0, "err": 0.04194711893796921, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 50857472.0, "err": 0.05440962687134743, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 52434944.0, "err": 0.04871177300810814, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1800537109375, "total_bits": 53352448.0, "err": 0.0475609190762043, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5316162109375, "total_bits": 59250688.0, "err": 0.037710800766944885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.67205810546875, "total_bits": 61606912.0, "err": 0.035256803035736084, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 67634688.0, "err": 0.027451537549495697, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 69212160.0, "err": 0.024778762832283974, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.2269287109375, "total_bits": 70916096.0, "err": 0.023852983489632607, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3284912109375, "total_bits": 72620032.0, "err": 0.02362506277859211, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 84411904.0, "err": 0.01442504208534956, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.2269287109375, "total_bits": 87693312.0, "err": 0.014510737732052803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3441162109375, "total_bits": 89659392.0, "err": 0.014429842121899128, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013128750026226044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.7347412109375, "total_bits": 96212992.0, "err": 0.013013679534196854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 101189120.0, "err": 0.008842715062201023, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 102766592.0, "err": 0.01143114548176527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.008496023714542389, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 136321024.0, "err": 0.01018300000578165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.5316162109375, "total_bits": 92805120.0, "err": 0.013128750026226044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281341552734375, "total_bits": 105383424.0, "err": 0.008496023714542389, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.gate_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.1338454633951187, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.12526249885559082, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.1220007911324501, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.10997921973466873, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.06304366886615753, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.05957658588886261, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.07136520743370056, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.06573767215013504, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.06397748738527298, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.056293219327926636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.05323885381221771, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.03640460595488548, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.031511105597019196, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.030292998999357224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.029999561607837677, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.01820751093327999, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.01567958854138851, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.015539240092039108, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.014321108348667622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.014133499003946781, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.009590737521648407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.009686766192317009, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.009105755016207695, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006556048523634672, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.014133499003946781, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.006556048523634672, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.up_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1797921316964284, "total_bits": 127997951.99999999, "err": 0.09196840226650238, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751046316964284, "total_bits": 139466752.0, "err": 0.08579234033823013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251046316964284, "total_bits": 154146816.0, "err": 0.08353814482688904, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7344796316964284, "total_bits": 160569344.0, "err": 0.07518865913152695, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2266671316964284, "total_bits": 189470720.0, "err": 0.04324428364634514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7344796316964284, "total_bits": 219289600.0, "err": 0.04080909118056297, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031276157924107, "total_bits": 177997312.0, "err": 0.04906615987420082, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251046316964284, "total_bits": 183506944.0, "err": 0.045124176889657974, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1797921316964284, "total_bits": 186718208.0, "err": 0.043926313519477844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5313546316964284, "total_bits": 207362048.0, "err": 0.03854922205209732, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6719273158482144, "total_bits": 215616512.0, "err": 0.03648326173424721, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031276157924107, "total_bits": 236717567.99999997, "err": 0.025101279839873314, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125104631696429, "total_bits": 242227200.0, "err": 0.02169780805706978, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.226667131696429, "total_bits": 248190976.0, "err": 0.020855387672781944, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.328229631696429, "total_bits": 254154752.0, "err": 0.020658105611801147, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.012602685950696468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.226667131696429, "total_bits": 306911232.0, "err": 0.010969596914947033, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.343854631696429, "total_bits": 313792512.0, "err": 0.01087441761046648, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.531354631696429, "total_bits": 324802560.0, "err": 0.010049819014966488, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.734479631696429, "total_bits": 336730112.0, "err": 0.009924669750034809, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031276157924107, "total_bits": 354158080.0, "err": 0.006837327498942614, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125104631696429, "total_bits": 359667712.0, "err": 0.007022833917289972, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.281276157924107, "total_bits": 368838144.0, "err": 0.006516158115118742, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00504687987267971, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031276157924107, "total_bits": 295437824.0, "err": 0.012602685950696468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.125104631696429, "total_bits": 477108224.0, "err": 0.00504687987267971, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.down_proj", "numel": 58720256, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1767054966517856, "total_bits": 127816704.0, "err": 0.09181137382984161, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3753662109375, "total_bits": 139482112.0, "err": 0.08160633593797684, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6253662109375, "total_bits": 154162176.0, "err": 0.07632366567850113, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7280447823660716, "total_bits": 160191488.0, "err": 0.06610209494829178, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2258126395089284, "total_bits": 189420544.0, "err": 0.042452048510313034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7280447823660716, "total_bits": 218911744.0, "err": 0.03782760724425316, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031341552734375, "total_bits": 178001152.0, "err": 0.05374910309910774, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1253662109375, "total_bits": 183522304.0, "err": 0.04779037460684776, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1767054966517856, "total_bits": 186536960.0, "err": 0.043680280447006226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5271519252232144, "total_bits": 207115264.0, "err": 0.035715240985155106, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6653616768973216, "total_bits": 215230976.0, "err": 0.03400997072458267, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031341552734375, "total_bits": 236721408.0, "err": 0.027637548744678497, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.1253662109375, "total_bits": 242242560.0, "err": 0.023407770320773125, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225812639508929, "total_bits": 248140800.0, "err": 0.021054396405816078, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.326259068080357, "total_bits": 254039039.99999997, "err": 0.0204454492777586, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031341552734375, "total_bits": 295441664.0, "err": 0.014275338500738144, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.011973254382610321, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.330723353794643, "total_bits": 313021440.0, "err": 0.011718731373548508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.527151925223214, "total_bits": 324555776.0, "err": 0.0108017073944211, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.728044782366071, "total_bits": 336352256.0, "err": 0.010456297546625137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031341552734375, "total_bits": 354161920.0, "err": 0.008383217267692089, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.1253662109375, "total_bits": 359683072.0, "err": 0.008921041153371334, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.245627267020089, "total_bits": 366744832.0, "err": 0.007624547462910414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.007287913467735052, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225812639508929, "total_bits": 306861056.0, "err": 0.011973254382610321, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.1253662109375, "total_bits": 477123584.0, "err": 0.007287913467735052, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } } ], "last_module_idx": 66, "base_perplexity": 2.781671559376234 }