{ "measurement": { "model.layers.0.self_attn": [ { "accuracy": 0.9067882895469666, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9177175760269165, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9214825630187988, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9550692439079285, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9559110403060913, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9565410614013672, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9701775312423706, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9714545607566833, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9744811654090881, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.97586590051651, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9776427149772644, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9789475202560425, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9794699549674988, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810879230499268, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9882597327232361, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9884263873100281, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907709956169128, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9907500147819519, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9941045045852661, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.0.block_sparse_moe": [ { "accuracy": 0.9162482619285583, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9216577410697937, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9322911500930786, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9352685213088989, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9647007584571838, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9679586291313171, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9725298881530762, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9823943972587585, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9838417172431946, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9826022386550903, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9846062064170837, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9912301898002625, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915924072265625, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948724508285522, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9951278567314148, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958245158195496, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971694350242615, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.self_attn": [ { "accuracy": 0.8992047905921936, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9071924686431885, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9170793294906616, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9458836317062378, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9487712979316711, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9520354270935059, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.961240828037262, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9658911228179932, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9702070355415344, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9715430736541748, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9748721122741699, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9771724343299866, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.977027177810669, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9793418049812317, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869425296783447, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.987941563129425, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9893659949302673, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908860325813293, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936305284500122, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.block_sparse_moe": [ { "accuracy": 0.9415642023086548, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9413121342658997, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9409374594688416, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9409515857696533, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9911555051803589, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9893935322761536, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.989450991153717, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9982009530067444, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.996887743473053, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9973922371864319, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976260662078857, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.998816967010498, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9983145594596863, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990900158882141, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.999210774898529, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9992322325706482, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992187023162842, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.self_attn": [ { "accuracy": 0.990545928478241, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9910634160041809, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916816353797913, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9925942420959473, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959110021591187, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.996087908744812, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966884851455688, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969009757041931, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971958994865417, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973528981208801, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973701238632202, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974915385246277, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975029826164246, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976794123649597, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9983696937561035, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9983503818511963, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9984636306762695, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984675049781799, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983234405517578, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.block_sparse_moe": [ { "accuracy": 0.9910680055618286, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9913401007652283, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9923831224441528, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9926673769950867, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9957854747772217, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996101975440979, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964727759361267, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9978584051132202, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.998046875, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.997869610786438, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981337785720825, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9989245533943176, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990037679672241, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9993788599967957, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9994075298309326, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9994639754295349, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9996680617332458, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.self_attn": [ { "accuracy": 0.9795281887054443, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9798194169998169, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9798218607902527, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810792803764343, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9909771084785461, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913586378097534, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918456077575684, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9923208951950073, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9946117401123047, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994682788848877, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959851503372192, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967591166496277, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961630702018738, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969954490661621, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9977127313613892, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9982021450996399, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.997846245765686, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983680844306946, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984323978424072, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.block_sparse_moe": [ { "accuracy": 0.9869529604911804, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9873523712158203, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9888346195220947, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892321825027466, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9937496185302734, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99425208568573, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947552680969238, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9968162178993225, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970951676368713, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9968389868736267, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972466230392456, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984020590782166, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985313415527344, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990715980529785, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9991146922111511, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9991897940635681, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999496340751648, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.self_attn": [ { "accuracy": 0.9761132597923279, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9764231443405151, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9781395792961121, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9792785048484802, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864591360092163, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864041209220886, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879760146141052, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9877216815948486, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9907268285751343, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914229512214661, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994326114654541, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945130348205566, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947597980499268, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949051737785339, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949073791503906, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957069158554077, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949800968170166, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9958264231681824, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9960317611694336, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.block_sparse_moe": [ { "accuracy": 0.9808738231658936, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9815309643745422, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9838190674781799, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9844250679016113, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9908621311187744, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915975332260132, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923686981201172, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953303933143616, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9957383275032043, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953675866127014, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959627389907837, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976578950881958, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9978504776954651, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9986363649368286, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9987004995346069, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9988178014755249, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992610216140747, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.self_attn": [ { "accuracy": 0.9719634652137756, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9727820158004761, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9742852449417114, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9775050282478333, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9848189353942871, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853628277778625, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9870995283126831, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876836538314819, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9890148043632507, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9891777038574219, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9917204976081848, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923698306083679, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922268390655518, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.993150532245636, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995287299156189, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958469867706299, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955398440361023, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963408708572388, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9965154528617859, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.block_sparse_moe": [ { "accuracy": 0.9737633466720581, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9746120572090149, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9778417348861694, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9786947965621948, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9874497652053833, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9884379506111145, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9895125031471252, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9935845136642456, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9941443800926208, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9936391115188599, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944421648979187, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967767596244812, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970482587814331, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981220364570618, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9982091188430786, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9983731508255005, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989771842956543, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.self_attn": [ { "accuracy": 0.9731457233428955, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9744046330451965, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.977060854434967, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.98115473985672, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806730151176453, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.981418788433075, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9830838441848755, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9841303825378418, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9854456782341003, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9855290651321411, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9912959337234497, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922337532043457, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919139742851257, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928252100944519, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950799345970154, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951396584510803, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954676628112793, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9958063960075378, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963059425354004, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.block_sparse_moe": [ { "accuracy": 0.9686686992645264, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9696639776229858, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9737097024917603, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9747704267501831, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9850161671638489, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9861977100372314, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.987537145614624, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9923300743103027, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9930151104927063, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9924031496047974, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933714270591736, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9961642622947693, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964728951454163, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997776985168457, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9978819489479065, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980896711349487, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988110661506653, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.self_attn": [ { "accuracy": 0.9584060907363892, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9601303935050964, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9616978764533997, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9667524695396423, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808213114738464, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9809763431549072, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9851066470146179, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9850733280181885, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.985920250415802, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9860385060310364, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897075891494751, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9902828931808472, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990752637386322, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9909821152687073, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943612217903137, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945234060287476, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950018525123596, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9950023293495178, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9955850839614868, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.block_sparse_moe": [ { "accuracy": 0.9641479253768921, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9652960896492004, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9697108268737793, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9708831310272217, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9827756881713867, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9841769337654114, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9856365323066711, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9911853075027466, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9919795989990234, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9912444353103638, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923794865608215, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9955689311027527, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959052801132202, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9974048733711243, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9975178241729736, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977393746376038, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985628724098206, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.self_attn": [ { "accuracy": 0.9546017646789551, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9565483927726746, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9586065411567688, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9635715484619141, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9776809811592102, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.978465735912323, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9815812110900879, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9824506044387817, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9842130541801453, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9849230647087097, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.985784649848938, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852104783058167, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9856173992156982, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9859763383865356, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9904539585113525, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.989751935005188, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990863025188446, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9901577234268188, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918235540390015, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.block_sparse_moe": [ { "accuracy": 0.957284152507782, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9586207866668701, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9639620184898376, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9654110074043274, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9793422222137451, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.981013834476471, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9828526377677917, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893752336502075, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9903407692909241, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9894829988479614, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990835428237915, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946606159210205, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950413107872009, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968433976173401, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9969893097877502, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997268557548523, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998203694820404, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.self_attn": [ { "accuracy": 0.9565317034721375, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9577098488807678, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9604106545448303, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9666262269020081, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9787371158599854, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9793527722358704, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.984663188457489, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9852400422096252, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9870606660842896, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9884949922561646, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.989353597164154, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898682832717896, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903037548065186, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908866882324219, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945310950279236, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947492480278015, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955671429634094, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9957377910614014, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99726802110672, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.block_sparse_moe": [ { "accuracy": 0.9562216401100159, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9577085971832275, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9632545709609985, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9647526741027832, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9788667559623718, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9805951118469238, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9825092554092407, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9891012907028198, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9900810718536377, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892241954803467, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906117916107178, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945322871208191, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948896765708923, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967488646507263, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9969066381454468, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997194230556488, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981440305709839, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.self_attn": [ { "accuracy": 0.9527701735496521, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9541651606559753, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9571799635887146, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9636037349700928, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.975989818572998, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9761081337928772, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810283184051514, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9812305569648743, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9813282489776611, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9824535250663757, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878506064414978, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9882125854492188, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885738492012024, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9890905022621155, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928545951843262, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929975271224976, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9937034845352173, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939423203468323, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959254264831543, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.block_sparse_moe": [ { "accuracy": 0.9494475722312927, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9511694312095642, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9577569365501404, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9595742225646973, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.975491464138031, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9775068759918213, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9797982573509216, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9872618913650513, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9884714484214783, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.987509548664093, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9891214370727539, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9936516880989075, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9941046237945557, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9962165355682373, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9964039921760559, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996755063533783, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978275299072266, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.self_attn": [ { "accuracy": 0.9467778205871582, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9488859176635742, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9505236148834229, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9579731822013855, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9710554480552673, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716861844062805, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.977662980556488, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9782785177230835, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9810504913330078, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.98219233751297, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9863719940185547, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869796633720398, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9875050187110901, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.988709032535553, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907366037368774, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908692240715027, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99156653881073, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9919276237487793, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933503866195679, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.block_sparse_moe": [ { "accuracy": 0.9465283155441284, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9484012126922607, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9554300904273987, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9573783278465271, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9739655256271362, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9761450290679932, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.978627622127533, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9863911867141724, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9877172112464905, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9867131114006042, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9884383678436279, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9932346343994141, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9937188625335693, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959430694580078, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9961529970169067, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965354204177856, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976581335067749, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.self_attn": [ { "accuracy": 0.9402430653572083, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.942696213722229, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9453091025352478, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.953092098236084, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9673134684562683, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9683114886283875, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746505618095398, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9756824374198914, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9735302329063416, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9766461253166199, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9814391136169434, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827558994293213, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9829526543617249, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9838970899581909, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879995584487915, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9877544045448303, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9888996481895447, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.988737940788269, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9891447424888611, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.block_sparse_moe": [ { "accuracy": 0.9443697333335876, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9464194774627686, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9535381197929382, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.955521285533905, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9728528261184692, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9751807451248169, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9777011871337891, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9857931137084961, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9871690273284912, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9860959053039551, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9879468679428101, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9929125308990479, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933907389640808, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957089424133301, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9959554076194763, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9963308572769165, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974982142448425, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.self_attn": [ { "accuracy": 0.9386351108551025, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9412864446640015, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9453186988830566, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9562393426895142, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9698872566223145, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9705983400344849, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9803400039672852, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9814019799232483, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9835389256477356, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9843765497207642, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9851095080375671, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860767126083374, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869024753570557, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879212975502014, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918459057807922, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9924142360687256, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935688376426697, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939531683921814, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9956839084625244, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.block_sparse_moe": [ { "accuracy": 0.9364676475524902, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9387849569320679, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9468547105789185, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.949124276638031, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9689660668373108, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.971605658531189, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9744648337364197, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9838013648986816, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9853904843330383, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.984165608882904, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9862714409828186, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919817447662354, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9925865530967712, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952443838119507, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9955007433891296, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959427714347839, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973514080047607, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.self_attn": [ { "accuracy": 0.9309174418449402, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.933725118637085, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9380926489830017, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9503087997436523, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9650832414627075, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9661017060279846, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.976899266242981, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9783095717430115, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9797156453132629, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9810853600502014, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9824468493461609, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9836163520812988, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9844991564750671, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9857290983200073, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910500049591064, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917761087417603, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9931749701499939, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936012029647827, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.995346188545227, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.block_sparse_moe": [ { "accuracy": 0.9362280368804932, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9384938478469849, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9464635252952576, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9487565755844116, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9687959551811218, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9714127779006958, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9742894768714905, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9835919737815857, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9852063059806824, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9840643405914307, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9861527681350708, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9918864965438843, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9924890995025635, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951303005218506, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954130053520203, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958568215370178, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972238540649414, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.self_attn": [ { "accuracy": 0.926139771938324, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9282694458961487, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9341769218444824, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9451899528503418, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9629027247428894, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.963938295841217, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9739084243774414, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.975311279296875, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9778823256492615, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9791116714477539, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9814271926879883, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9828386306762695, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983410120010376, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9849848747253418, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903965592384338, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9911839962005615, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922545552253723, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929354786872864, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9960562586784363, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.block_sparse_moe": [ { "accuracy": 0.9366453886032104, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9388306140899658, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9461753964424133, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9483304023742676, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.968975305557251, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9714910984039307, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.974142849445343, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9837688207626343, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9853091239929199, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9841887950897217, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9862262010574341, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919819235801697, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9925937652587891, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995257556438446, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.995539665222168, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959569573402405, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997409462928772, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.self_attn": [ { "accuracy": 0.9307618737220764, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9329615235328674, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9384997487068176, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9488650560379028, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.96356600522995, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9641638398170471, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.973844587802887, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9744487404823303, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9747790694236755, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9754589200019836, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9812324643135071, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827317595481873, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9833462238311768, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9844845533370972, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898632764816284, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903991222381592, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914281368255615, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9916501641273499, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9942371249198914, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.block_sparse_moe": [ { "accuracy": 0.9395449161529541, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9416027665138245, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9483568668365479, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.950420618057251, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9703508019447327, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9728196263313293, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752951264381409, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9844405651092529, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9859333634376526, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9848726391792297, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.98685222864151, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923068881034851, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9928939938545227, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953963756561279, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9956991672515869, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960857629776001, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974470734596252, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.self_attn": [ { "accuracy": 0.9400796890258789, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9424660205841064, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9464419484138489, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9576985836029053, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9692317843437195, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.970343291759491, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9787785410881042, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9802383780479431, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.980556845664978, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9806811809539795, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9841726422309875, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853521585464478, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9858353734016418, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9870466589927673, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990352988243103, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906070828437805, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917676448822021, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9919067025184631, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939866065979004, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.block_sparse_moe": [ { "accuracy": 0.9337330460548401, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9359050393104553, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9434888958930969, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9458630084991455, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9674187898635864, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9701007008552551, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9729616641998291, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9828730821609497, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9845372438430786, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9834111332893372, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9855387210845947, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915560483932495, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9922093749046326, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949573874473572, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9952790141105652, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957297444343567, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997194766998291, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.self_attn": [ { "accuracy": 0.9404236078262329, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9429998993873596, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9471920728683472, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9559651613235474, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9700754880905151, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9707397818565369, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9790695309638977, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9799709320068359, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9815611243247986, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9823623299598694, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9845070242881775, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9854212999343872, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.986069917678833, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871479868888855, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9915421605110168, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992544949054718, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929100871086121, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940165281295776, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9953362941741943, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.block_sparse_moe": [ { "accuracy": 0.9359573125839233, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9380837082862854, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9450436234474182, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9473040699958801, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9684911966323853, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9710261821746826, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9737166166305542, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9833680391311646, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9849883913993835, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9839664697647095, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9860057830810547, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9918556809425354, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9924961924552917, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995165228843689, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954947233200073, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959261417388916, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973859786987305, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.self_attn": [ { "accuracy": 0.9455645084381104, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9476815462112427, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9516910910606384, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9603496193885803, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9725568294525146, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9728826880455017, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9812980890274048, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9817120432853699, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.981988251209259, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9819945693016052, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9861330389976501, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871248602867126, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9877132177352905, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9888567328453064, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929689168930054, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935982823371887, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944139122962952, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9949852824211121, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963383674621582, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.block_sparse_moe": [ { "accuracy": 0.9391094446182251, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9411165118217468, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9471585154533386, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9492319822311401, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9699552059173584, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9723553657531738, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9747426509857178, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9840641617774963, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9856025576591492, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9846768975257874, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9866021275520325, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9921832084655762, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9927480816841125, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952936768531799, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9956203103065491, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959983229637146, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973669648170471, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.self_attn": [ { "accuracy": 0.9538553357124329, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9558570384979248, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9589889645576477, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9666476845741272, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9765026569366455, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9769631624221802, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9839024543762207, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9847570061683655, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9869215488433838, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880277514457703, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9879828691482544, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887779951095581, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892855286598206, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9902099370956421, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929762482643127, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934956431388855, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9940653443336487, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9944170713424683, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994766891002655, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.block_sparse_moe": [ { "accuracy": 0.9457312226295471, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9474432468414307, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9521948099136353, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9538738131523132, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9731522798538208, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752893447875977, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9771627187728882, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9858904480934143, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.987227201461792, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9863337278366089, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9880734086036682, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9930614233016968, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.993640661239624, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959012269973755, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9961798191070557, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964807033538818, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978252649307251, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.self_attn": [ { "accuracy": 0.9614972472190857, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9630907773971558, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9657652974128723, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716720581054688, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806828498840332, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808754324913025, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864546656608582, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9868253469467163, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.988134503364563, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9885315895080566, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9903974533081055, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9911051392555237, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914910197257996, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923526644706726, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942717552185059, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945540428161621, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951934218406677, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9954721927642822, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9968225359916687, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.block_sparse_moe": [ { "accuracy": 0.955048680305481, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9565030932426453, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.960193932056427, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9614853858947754, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9778380990028381, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.979629397392273, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9810252785682678, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9883707761764526, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.989475667476654, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9886927008628845, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9901518821716309, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9942411184310913, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947350025177002, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965632557868958, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9967820644378662, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969993829727173, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980944991111755, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.self_attn": [ { "accuracy": 0.9665862321853638, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9685580134391785, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9706789255142212, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9764211177825928, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983084499835968, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9835995435714722, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879946112632751, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9886513948440552, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9902456402778625, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908341765403748, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9916015863418579, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922620058059692, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992514431476593, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9931382536888123, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953325390815735, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958641529083252, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961457252502441, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967287182807922, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973146915435791, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.block_sparse_moe": [ { "accuracy": 0.9581499099731445, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9595004916191101, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9627856016159058, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.963940441608429, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.979333221912384, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9809849858283997, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9822102785110474, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9891775846481323, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.990182101726532, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9894809126853943, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990828812122345, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946661591529846, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951383471488953, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968669414520264, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970632195472717, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997258722782135, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983413219451904, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.self_attn": [ { "accuracy": 0.9666981101036072, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9685927033424377, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9714146852493286, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9767617583274841, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.982944905757904, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9834430813789368, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9876484870910645, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9881972074508667, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9889382123947144, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.989739179611206, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918262958526611, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922913908958435, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9926827549934387, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9932340979576111, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955021739006042, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959923028945923, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9962418079376221, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967868328094482, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997798502445221, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.block_sparse_moe": [ { "accuracy": 0.9589799046516418, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9602742195129395, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.963300347328186, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9644175171852112, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9797069430351257, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813179969787598, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9824610352516174, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893194437026978, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9903148412704468, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9896608591079712, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9909744262695312, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947419166564941, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951983094215393, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968808889389038, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970874786376953, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997269332408905, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983143210411072, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.self_attn": [ { "accuracy": 0.9673762321472168, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9679964780807495, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9706433415412903, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752945899963379, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9823808073997498, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9822703003883362, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9867497682571411, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9869536757469177, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9892389178276062, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897233247756958, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914225339889526, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992520809173584, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930751323699951, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936210513114929, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9946701526641846, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9962741136550903, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995280385017395, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971272349357605, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966344237327576, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.block_sparse_moe": [ { "accuracy": 0.9598531723022461, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9611683487892151, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9639076590538025, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9649520516395569, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9801657795906067, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9817363619804382, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9827626943588257, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9895222783088684, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9904941320419312, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9898799061775208, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911530017852783, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994818389415741, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952555894851685, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968741536140442, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970959424972534, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972575306892395, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9982579350471497, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.self_attn": [ { "accuracy": 0.9688186645507812, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9701895117759705, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9728450179100037, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9783141016960144, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9845249056816101, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9847149848937988, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9897538423538208, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9899092316627502, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9900721311569214, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9911779761314392, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918477535247803, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9925499558448792, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929670691490173, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935824275016785, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959262609481812, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9962867498397827, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967677593231201, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971021413803101, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977668523788452, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.block_sparse_moe": [ { "accuracy": 0.9597939848899841, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9611161947250366, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9638566374778748, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9649006128311157, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9800922274589539, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.981726348400116, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9827641844749451, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9894959926605225, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9904935359954834, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9898434281349182, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911564588546753, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948053359985352, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995258092880249, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968696236610413, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970847368240356, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972468614578247, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9982454776763916, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.self_attn": [ { "accuracy": 0.9686658978462219, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9700933694839478, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9724171161651611, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9779847264289856, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9841612577438354, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9844681024551392, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892829656600952, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897152185440063, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.990149736404419, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9917463064193726, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9920781850814819, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9926520586013794, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930605888366699, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9937618374824524, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957879781723022, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964690804481506, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9965918660163879, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973456859588623, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979214072227478, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.block_sparse_moe": [ { "accuracy": 0.9608571529388428, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9621493220329285, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9647561311721802, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9657557606697083, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9806537628173828, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9822230935096741, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832121133804321, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9897847771644592, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9907569289207458, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9901394844055176, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9913977384567261, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994959831237793, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953965544700623, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969742298126221, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972097873687744, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997367799282074, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983646273612976, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.self_attn": [ { "accuracy": 0.9683869481086731, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9707068800926208, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9740902781486511, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9787266254425049, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.984859049320221, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9851580858230591, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892011284828186, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9894832372665405, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9888684749603271, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9899089336395264, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9926565289497375, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9932090044021606, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935433268547058, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939624071121216, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961981773376465, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964185953140259, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969852566719055, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971997737884521, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9982542395591736, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.block_sparse_moe": [ { "accuracy": 0.957897961139679, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9593111276626587, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.962069034576416, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9631658792495728, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9791309833526611, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9808214902877808, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9818924069404602, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9888975620269775, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9899835586547852, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893601536750793, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9907132983207703, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945561289787292, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950129985809326, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967098236083984, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9969704151153564, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971427917480469, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981980323791504, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.self_attn": [ { "accuracy": 0.9653977751731873, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9668785929679871, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9706372022628784, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9769287705421448, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9828128218650818, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.98324054479599, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9878803491592407, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9886369705200195, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.989266574382782, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897065162658691, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9909981489181519, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991666853427887, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920990467071533, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928867816925049, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951639175415039, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951352477073669, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958961606025696, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959748983383179, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969343543052673, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.block_sparse_moe": [ { "accuracy": 0.9544247984886169, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9560117125511169, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9588847160339355, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9601418972015381, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9773159027099609, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9791907668113708, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9803653955459595, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9877748489379883, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9889665246009827, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9883886575698853, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9898701310157776, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9940261244773865, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994482696056366, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9962992072105408, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.996628999710083, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968178868293762, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978936314582825, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.self_attn": [ { "accuracy": 0.9608748555183411, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.962222158908844, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.967974841594696, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9749037027359009, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9797923564910889, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9803337454795837, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864434003829956, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.987026572227478, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9874898791313171, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878078699111938, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9901180863380432, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907422065734863, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916208386421204, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921712279319763, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951450824737549, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956217408180237, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961603879928589, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967023134231567, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980209469795227, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.block_sparse_moe": [ { "accuracy": 0.9551180005073547, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9567566514015198, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.959876537322998, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9611309766769409, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9777185320854187, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9795714020729065, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.980852484703064, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9880881309509277, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892033338546753, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9886367917060852, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9900896549224854, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9941924810409546, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946589469909668, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964574575424194, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9967527389526367, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969587326049805, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980418682098389, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.self_attn": [ { "accuracy": 0.9662954807281494, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9679164290428162, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716951251029968, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9786924719810486, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9829646348953247, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983328104019165, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9889703989028931, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9894382953643799, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9898733496665955, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908291101455688, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9915413856506348, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921064376831055, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928567409515381, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933127760887146, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958487749099731, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959938526153564, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967434406280518, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99693363904953, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9982244968414307, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.block_sparse_moe": [ { "accuracy": 0.9542804956436157, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9557631015777588, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9588345885276794, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9600708484649658, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9772197008132935, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9791390895843506, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9803816080093384, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9876689314842224, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9889033436775208, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9883190989494324, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9898006319999695, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.993979811668396, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9943466186523438, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996215283870697, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9965730905532837, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967727661132812, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977964162826538, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.self_attn": [ { "accuracy": 0.9660325050354004, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9676452279090881, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9703148603439331, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752614498138428, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9836623072624207, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9838367700576782, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9888634085655212, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9890074133872986, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9896683096885681, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9904163479804993, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914776086807251, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921423196792603, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992666482925415, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933781027793884, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959150552749634, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960066676139832, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967944622039795, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967555999755859, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979788064956665, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.block_sparse_moe": [ { "accuracy": 0.9734880924224854, "total_bits": 3157926400, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9741837382316589, "total_bits": 3268026880, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9755711555480957, "total_bits": 3652411392, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9761722683906555, "total_bits": 4098056192, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9871899485588074, "total_bits": 4621411072, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9881865978240967, "total_bits": 4737212416, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9887644648551941, "total_bits": 5093868288, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9930471777915955, "total_bits": 5824164608, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9937756657600403, "total_bits": 5910044672, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9934390187263489, "total_bits": 6006579968, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9942041039466858, "total_bits": 6122381312, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965391755104065, "total_bits": 7391748864, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967101812362671, "total_bits": 7507550208, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977518320083618, "total_bits": 8550425344, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9979667067527771, "total_bits": 8877312000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980623126029968, "total_bits": 9674229760, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985933303833008, "total_bits": 11318396928, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.norm.norm": null, "lm_head.linear": null }, "last_module_idx": 66 }