Maverick-7B
This model is a merge of the following models:
🏆 Evaluation
TruthfulQA
Task |
Version |
Metric |
Value |
|
Stderr |
truthfulqa_mc |
1 |
mc1 |
0.5165 |
± |
0.0175 |
|
|
mc2 |
0.6661 |
± |
0.0152 |
GPT4ALL
Task |
Version |
Metric |
Value |
|
Stderr |
arc_challenge |
0 |
acc |
0.6442 |
± |
0.0140 |
|
|
acc_norm |
0.6570 |
± |
0.0139 |
arc_easy |
0 |
acc |
0.8645 |
± |
0.0070 |
|
|
acc_norm |
0.8304 |
± |
0.0077 |
boolq |
1 |
acc |
0.8850 |
± |
0.0056 |
hellaswag |
0 |
acc |
0.6813 |
± |
0.0047 |
|
|
acc_norm |
0.8571 |
± |
0.0035 |
openbookqa |
0 |
acc |
0.3640 |
± |
0.0215 |
|
|
acc_norm |
0.4800 |
± |
0.0224 |
piqa |
0 |
acc |
0.8324 |
± |
0.0087 |
|
|
acc_norm |
0.8460 |
± |
0.0084 |
winogrande |
0 |
acc |
0.7869 |
± |
0.0115 |
AGIEval
Task |
Version |
Metric |
Value |
|
Stderr |
agieval_aqua_rat |
0 |
acc |
0.2717 |
± |
0.0280 |
|
|
acc_norm |
0.2559 |
± |
0.0274 |
agieval_logiqa_en |
0 |
acc |
0.3902 |
± |
0.0191 |
|
|
acc_norm |
0.3856 |
± |
0.0191 |
agieval_lsat_ar |
0 |
acc |
0.2565 |
± |
0.0289 |
|
|
acc_norm |
0.2478 |
± |
0.0285 |
agieval_lsat_lr |
0 |
acc |
0.5118 |
± |
0.0222 |
|
|
acc_norm |
0.5216 |
± |
0.0221 |
agieval_lsat_rc |
0 |
acc |
0.6543 |
± |
0.0291 |
|
|
acc_norm |
0.6506 |
± |
0.0291 |
agieval_sat_en |
0 |
acc |
0.7961 |
± |
0.0281 |
|
|
acc_norm |
0.8010 |
± |
0.0279 |
agieval_sat_en_without_passage |
0 |
acc |
0.4660 |
± |
0.0348 |
|
|
acc_norm |
0.4709 |
± |
0.0349 |
agieval_sat_math |
0 |
acc |
0.3227 |
± |
0.0316 |
|
|
acc_norm |
0.3045 |
± |
0.0311 |
Bigbench
Task |
Version |
Metric |
Value |
|
Stderr |
bigbench_causal_judgement |
0 |
multiple_choice_grade |
0.5684 |
± |
0.0360 |
bigbench_date_understanding |
0 |
multiple_choice_grade |
0.6612 |
± |
0.0247 |
bigbench_disambiguation_qa |
0 |
multiple_choice_grade |
0.4380 |
± |
0.0309 |
bigbench_geometric_shapes |
0 |
multiple_choice_grade |
0.2173 |
± |
0.0218 |
|
|
exact_str_match |
0.0000 |
± |
0.0000 |
bigbench_logical_deduction_five_objects |
0 |
multiple_choice_grade |
0.3320 |
± |
0.0211 |
bigbench_logical_deduction_seven_objects |
0 |
multiple_choice_grade |
0.2243 |
± |
0.0158 |
bigbench_logical_deduction_three_objects |
0 |
multiple_choice_grade |
0.5667 |
± |
0.0287 |
bigbench_movie_recommendation |
0 |
multiple_choice_grade |
0.4260 |
± |
0.0221 |
bigbench_navigate |
0 |
multiple_choice_grade |
0.5310 |
± |
0.0158 |
bigbench_reasoning_about_colored_objects |
0 |
multiple_choice_grade |
0.7230 |
± |
0.0100 |
bigbench_ruin_names |
0 |
multiple_choice_grade |
0.5379 |
± |
0.0236 |
bigbench_salient_translation_error_detection |
0 |
multiple_choice_grade |
0.2956 |
± |
0.0145 |
bigbench_snarks |
0 |
multiple_choice_grade |
0.6961 |
± |
0.0343 |
bigbench_sports_understanding |
0 |
multiple_choice_grade |
0.7424 |
± |
0.0139 |
bigbench_temporal_sequences |
0 |
multiple_choice_grade |
0.4690 |
± |
0.0158 |
bigbench_tracking_shuffled_objects_five_objects |
0 |
multiple_choice_grade |
0.2304 |
± |
0.0119 |
bigbench_tracking_shuffled_objects_seven_objects |
0 |
multiple_choice_grade |
0.1880 |
± |
0.0093 |
bigbench_tracking_shuffled_objects_three_objects |
0 |
multiple_choice_grade |
0.5667 |
± |
0.0287 |