|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.4005894355021535, |
|
"global_step": 1875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001, |
|
"loss": 1.001, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0287, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2146, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.997, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9024, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9901, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1264, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2038, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8935, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9178, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9746, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1566, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2877, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9146, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8895, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0121, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.0215636491775513, |
|
"eval_runtime": 950.138, |
|
"eval_samples_per_second": 1.052, |
|
"eval_steps_per_second": 1.052, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"mmlu_eval_accuracy": 0.731892294851104, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182, |
|
"mmlu_eval_accuracy_anatomy": 0.7857142857142857, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 0.7272727272727273, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.896551724137931, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.7272727272727273, |
|
"mmlu_eval_accuracy_college_mathematics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6538461538461539, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.7073170731707317, |
|
"mmlu_eval_accuracy_formal_logic": 0.5714285714285714, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.8125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7441860465116279, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9615384615384616, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.6956521739130435, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.8076923076923077, |
|
"mmlu_eval_accuracy_human_aging": 0.8260869565217391, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.5454545454545454, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.88, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.59, |
|
"mmlu_eval_accuracy_nutrition": 0.7878787878787878, |
|
"mmlu_eval_accuracy_philosophy": 0.7941176470588235, |
|
"mmlu_eval_accuracy_prehistory": 0.8285714285714286, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6451612903225806, |
|
"mmlu_eval_accuracy_professional_law": 0.6294117647058823, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8064516129032258, |
|
"mmlu_eval_accuracy_professional_psychology": 0.8115942028985508, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9545454545454546, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.326305795171384, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1133, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2485, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9653, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9455, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0373, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1425, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3136, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.872, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0152, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1309, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001, |
|
"loss": 1.267, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9249, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9148, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9864, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2312, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2354, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9126, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9213, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.0163359642028809, |
|
"eval_runtime": 948.1151, |
|
"eval_samples_per_second": 1.055, |
|
"eval_steps_per_second": 1.055, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"mmlu_eval_accuracy": 0.7395476061435284, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.7857142857142857, |
|
"mmlu_eval_accuracy_astronomy": 0.75, |
|
"mmlu_eval_accuracy_business_ethics": 0.7272727272727273, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.896551724137931, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.7272727272727273, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5769230769230769, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.875, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6829268292682927, |
|
"mmlu_eval_accuracy_formal_logic": 0.6428571428571429, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.8125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.41379310344827586, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9615384615384616, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.95, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.7391304347826086, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, |
|
"mmlu_eval_accuracy_human_aging": 0.8260869565217391, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.5454545454545454, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7790697674418605, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.57, |
|
"mmlu_eval_accuracy_nutrition": 0.7272727272727273, |
|
"mmlu_eval_accuracy_philosophy": 0.7941176470588235, |
|
"mmlu_eval_accuracy_prehistory": 0.8571428571428571, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_law": 0.6411764705882353, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8387096774193549, |
|
"mmlu_eval_accuracy_professional_psychology": 0.8115942028985508, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9545454545454546, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8947368421052632, |
|
"mmlu_loss": 1.2796503596061355, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9737, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001, |
|
"loss": 1.157, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2106, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8687, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8742, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9901, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2238, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2604, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8756, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8683, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9824, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1574, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8657, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9207, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001, |
|
"loss": 1.012, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1517, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1654, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8931, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.0150845050811768, |
|
"eval_runtime": 949.8392, |
|
"eval_samples_per_second": 1.053, |
|
"eval_steps_per_second": 1.053, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"mmlu_eval_accuracy": 0.7346397374699287, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.7142857142857143, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 0.8181818181818182, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8620689655172413, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.7272727272727273, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6829268292682927, |
|
"mmlu_eval_accuracy_formal_logic": 0.6428571428571429, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.84375, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.41379310344827586, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9230769230769231, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9666666666666667, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.7391304347826086, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.8076923076923077, |
|
"mmlu_eval_accuracy_human_aging": 0.8260869565217391, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7222222222222222, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.88, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7558139534883721, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.56, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.7941176470588235, |
|
"mmlu_eval_accuracy_prehistory": 0.8571428571428571, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_law": 0.6294117647058823, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8064516129032258, |
|
"mmlu_eval_accuracy_professional_psychology": 0.8260869565217391, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.186674291658526, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8507, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9164, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0908, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0431, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8567, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8818, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9499, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0437, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0487, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8405, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8818, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9619, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0753, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0218, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8763, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8789, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8631, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9846, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.0305067300796509, |
|
"eval_runtime": 948.7106, |
|
"eval_samples_per_second": 1.054, |
|
"eval_steps_per_second": 1.054, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"mmlu_eval_accuracy": 0.7324229372189777, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 1.0, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.896551724137931, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.8636363636363636, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.7073170731707317, |
|
"mmlu_eval_accuracy_formal_logic": 0.6428571428571429, |
|
"mmlu_eval_accuracy_global_facts": 0.4, |
|
"mmlu_eval_accuracy_high_school_biology": 0.8125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3448275862068966, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9615384615384616, |
|
"mmlu_eval_accuracy_high_school_physics": 0.35294117647058826, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.782608695652174, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9545454545454546, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, |
|
"mmlu_eval_accuracy_human_aging": 0.782608695652174, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.96, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7441860465116279, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.57, |
|
"mmlu_eval_accuracy_nutrition": 0.7272727272727273, |
|
"mmlu_eval_accuracy_philosophy": 0.7941176470588235, |
|
"mmlu_eval_accuracy_prehistory": 0.8285714285714286, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_law": 0.6411764705882353, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8064516129032258, |
|
"mmlu_eval_accuracy_professional_psychology": 0.8260869565217391, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.2988067958029479, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0735, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9066, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8716, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9144, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0338, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0275, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8382, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8489, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8931, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0515, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0965, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8928, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8608, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8831, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0253, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9905, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8487, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8568, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9047, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.0250624418258667, |
|
"eval_runtime": 946.4035, |
|
"eval_samples_per_second": 1.057, |
|
"eval_steps_per_second": 1.057, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"mmlu_eval_accuracy": 0.7288948695878031, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8620689655172413, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.7272727272727273, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6097560975609756, |
|
"mmlu_eval_accuracy_formal_logic": 0.6428571428571429, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.84375, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7906976744186046, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9230769230769231, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.7391304347826086, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, |
|
"mmlu_eval_accuracy_human_aging": 0.782608695652174, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7222222222222222, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.96, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7790697674418605, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.57, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.8235294117647058, |
|
"mmlu_eval_accuracy_prehistory": 0.8857142857142857, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_law": 0.6235294117647059, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8387096774193549, |
|
"mmlu_eval_accuracy_professional_psychology": 0.8115942028985508, |
|
"mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.243813282909306, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0174, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0302, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8799, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8447, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9053, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0331, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8753, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8744, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8899, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0053, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0127, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8023, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8349, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9742, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0971, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0728, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7724, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7675, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 1.052681565284729, |
|
"eval_runtime": 942.0722, |
|
"eval_samples_per_second": 1.061, |
|
"eval_steps_per_second": 1.061, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"mmlu_eval_accuracy": 0.7373981967098951, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
|
"mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.896551724137931, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.5454545454545454, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.8333333333333334, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.875, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.5853658536585366, |
|
"mmlu_eval_accuracy_formal_logic": 0.7142857142857143, |
|
"mmlu_eval_accuracy_global_facts": 0.4, |
|
"mmlu_eval_accuracy_high_school_biology": 0.84375, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.41379310344827586, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9230769230769231, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.95, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.782608695652174, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, |
|
"mmlu_eval_accuracy_human_aging": 0.8260869565217391, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.62, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.8235294117647058, |
|
"mmlu_eval_accuracy_prehistory": 0.8857142857142857, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_law": 0.6294117647058823, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8709677419354839, |
|
"mmlu_eval_accuracy_professional_psychology": 0.782608695652174, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.2340081441760609, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7194, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8236, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6652, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7177, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7788, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8117, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8145, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6984, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7011, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.769, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7705, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8066, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6622, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6641, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7239, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7618, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7845, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0001, |
|
"loss": 0.719, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 1.1104822158813477, |
|
"eval_runtime": 948.1299, |
|
"eval_samples_per_second": 1.055, |
|
"eval_steps_per_second": 1.055, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"mmlu_eval_accuracy": 0.7369285730399766, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
|
"mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 1.0, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8275862068965517, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_mathematics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_medicine": 0.8636363636363636, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.7272727272727273, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6923076923076923, |
|
"mmlu_eval_accuracy_econometrics": 0.75, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6341463414634146, |
|
"mmlu_eval_accuracy_formal_logic": 0.7857142857142857, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.84375, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.8888888888888888, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7906976744186046, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9230769230769231, |
|
"mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.95, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.6956521739130435, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7307692307692307, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.6666666666666666, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7222222222222222, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.88, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.57, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.8529411764705882, |
|
"mmlu_eval_accuracy_prehistory": 0.8571428571428571, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_law": 0.6058823529411764, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8709677419354839, |
|
"mmlu_eval_accuracy_professional_psychology": 0.7681159420289855, |
|
"mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5555555555555556, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.0866562834095908, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7093, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7684, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7501, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8043, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6927, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7278, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8095, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7463, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7707, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7152, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.687, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7529, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7565, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8066, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7623, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6947, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7756, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8453, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8306, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 1.100826621055603, |
|
"eval_runtime": 940.4488, |
|
"eval_samples_per_second": 1.063, |
|
"eval_steps_per_second": 1.063, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"mmlu_eval_accuracy": 0.7363077307176445, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8620689655172413, |
|
"mmlu_eval_accuracy_college_biology": 0.8125, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.5454545454545454, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.75, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.8125, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6585365853658537, |
|
"mmlu_eval_accuracy_formal_logic": 0.6428571428571429, |
|
"mmlu_eval_accuracy_global_facts": 0.6, |
|
"mmlu_eval_accuracy_high_school_biology": 0.78125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.5909090909090909, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9545454545454546, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7906976744186046, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.4827586206896552, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9615384615384616, |
|
"mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9166666666666666, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.6521739130434783, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.8076923076923077, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.75, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.45454545454545453, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.88, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.64, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.8235294117647058, |
|
"mmlu_eval_accuracy_prehistory": 0.8571428571428571, |
|
"mmlu_eval_accuracy_professional_accounting": 0.7096774193548387, |
|
"mmlu_eval_accuracy_professional_law": 0.6176470588235294, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8709677419354839, |
|
"mmlu_eval_accuracy_professional_psychology": 0.7971014492753623, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8518518518518519, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5555555555555556, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.2313211129857853, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6937, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6997, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7588, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7731, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7914, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7175, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7046, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7597, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7932, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8059, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7258, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7486, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7233, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7945, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8324, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7294, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6117, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6464, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6156, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 1.1478718519210815, |
|
"eval_runtime": 932.4225, |
|
"eval_samples_per_second": 1.072, |
|
"eval_steps_per_second": 1.072, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"mmlu_eval_accuracy": 0.745366643285036, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 1.0, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8620689655172413, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.7272727272727273, |
|
"mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_medicine": 0.9090909090909091, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6538461538461539, |
|
"mmlu_eval_accuracy_econometrics": 0.75, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.875, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.7073170731707317, |
|
"mmlu_eval_accuracy_formal_logic": 0.7857142857142857, |
|
"mmlu_eval_accuracy_global_facts": 0.8, |
|
"mmlu_eval_accuracy_high_school_biology": 0.8125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.5454545454545454, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9230769230769231, |
|
"mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.6521739130434783, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.8076923076923077, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.75, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.96, |
|
"mmlu_eval_accuracy_medical_genetics": 1.0, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.7894736842105263, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.61, |
|
"mmlu_eval_accuracy_nutrition": 0.7272727272727273, |
|
"mmlu_eval_accuracy_philosophy": 0.7647058823529411, |
|
"mmlu_eval_accuracy_prehistory": 0.8571428571428571, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_law": 0.6176470588235294, |
|
"mmlu_eval_accuracy_professional_medicine": 0.9032258064516129, |
|
"mmlu_eval_accuracy_professional_psychology": 0.7391304347826086, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.4050485734687297, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5237, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3516, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4976, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6535, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5926, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5476, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0001, |
|
"loss": 0.368, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5043, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5907, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5609, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5272, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3672, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4947, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6441, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5989, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5411, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0001, |
|
"loss": 0.401, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4685, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6234, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 1.1522600650787354, |
|
"eval_runtime": 572.6447, |
|
"eval_samples_per_second": 1.746, |
|
"eval_steps_per_second": 1.746, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"mmlu_eval_accuracy": 0.7349633316353468, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.6875, |
|
"mmlu_eval_accuracy_business_ethics": 1.0, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.8620689655172413, |
|
"mmlu_eval_accuracy_college_biology": 0.875, |
|
"mmlu_eval_accuracy_college_chemistry": 0.5, |
|
"mmlu_eval_accuracy_college_computer_science": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_mathematics": 0.2727272727272727, |
|
"mmlu_eval_accuracy_college_medicine": 0.8636363636363636, |
|
"mmlu_eval_accuracy_college_physics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_computer_security": 0.7272727272727273, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_econometrics": 0.75, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.75, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.6341463414634146, |
|
"mmlu_eval_accuracy_formal_logic": 0.7142857142857143, |
|
"mmlu_eval_accuracy_global_facts": 0.6, |
|
"mmlu_eval_accuracy_high_school_biology": 0.8125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.8888888888888888, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.9523809523809523, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.7674418604651163, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.4482758620689655, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.9615384615384616, |
|
"mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.6521739130434783, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.8076923076923077, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.75, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778, |
|
"mmlu_eval_accuracy_machine_learning": 0.6363636363636364, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.84, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, |
|
"mmlu_eval_accuracy_moral_disputes": 0.8157894736842105, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.64, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.7941176470588235, |
|
"mmlu_eval_accuracy_prehistory": 0.8857142857142857, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6451612903225806, |
|
"mmlu_eval_accuracy_professional_law": 0.6176470588235294, |
|
"mmlu_eval_accuracy_professional_medicine": 0.8709677419354839, |
|
"mmlu_eval_accuracy_professional_psychology": 0.7246376811594203, |
|
"mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
|
"mmlu_eval_accuracy_security_studies": 0.8148148148148148, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 1.0, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.221846800616253, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"step": 1875, |
|
"total_flos": 2.1784431229955113e+18, |
|
"train_loss": 0.8699908837636312, |
|
"train_runtime": 112632.6276, |
|
"train_samples_per_second": 0.266, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"max_steps": 1875, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.1784431229955113e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|