|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3287175905000616, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4084967333570947e-06, |
|
"loss": 2.5231, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0507482022971233e-06, |
|
"loss": 2.3436, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.385606273598312e-06, |
|
"loss": 2.22, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6136695401116585e-06, |
|
"loss": 2.1055, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7868297632261957e-06, |
|
"loss": 2.1275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.926458092787486e-06, |
|
"loss": 2.0425, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.0434580045013773e-06, |
|
"loss": 2.0407, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.1441512086208035e-06, |
|
"loss": 2.0558, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.232532087697698e-06, |
|
"loss": 1.9887, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.3112862237770753e-06, |
|
"loss": 1.9845, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.3823062961420163e-06, |
|
"loss": 1.9856, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.446976436243603e-06, |
|
"loss": 1.9968, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.506339534926595e-06, |
|
"loss": 1.9247, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.5612009452606784e-06, |
|
"loss": 1.9817, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.612195557913627e-06, |
|
"loss": 1.9644, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.65983275401539e-06, |
|
"loss": 1.9639, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7045274519126395e-06, |
|
"loss": 1.9587, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7466221106030114e-06, |
|
"loss": 1.9849, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.786402677560832e-06, |
|
"loss": 1.9745, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.824110376935989e-06, |
|
"loss": 1.9429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.8599505757615295e-06, |
|
"loss": 1.9484, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.894099556414216e-06, |
|
"loss": 1.9214, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9267097619885385e-06, |
|
"loss": 1.9274, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.95791391001684e-06, |
|
"loss": 1.9185, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.987828255432777e-06, |
|
"loss": 1.9578, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.016555205552159e-06, |
|
"loss": 1.907, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.044185435607626e-06, |
|
"loss": 1.9448, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.070799615107415e-06, |
|
"loss": 1.8884, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.096469827889988e-06, |
|
"loss": 1.9402, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.121260748862021e-06, |
|
"loss": 1.9153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.145230625795312e-06, |
|
"loss": 1.9106, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.1684321036962525e-06, |
|
"loss": 1.958, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.190912921100477e-06, |
|
"loss": 1.9117, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.212716501452232e-06, |
|
"loss": 1.9097, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.233882457984791e-06, |
|
"loss": 1.9279, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.2544470268536555e-06, |
|
"loss": 1.9164, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.27444344042015e-06, |
|
"loss": 1.9323, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.293902250342989e-06, |
|
"loss": 1.9134, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.312851608364853e-06, |
|
"loss": 1.8835, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.3313175112718595e-06, |
|
"loss": 1.8969, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.3493240153753665e-06, |
|
"loss": 1.9238, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.366893424956263e-06, |
|
"loss": 1.8946, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.38404645837504e-06, |
|
"loss": 1.8781, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.400802394950703e-06, |
|
"loss": 1.8955, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.4171792052198945e-06, |
|
"loss": 1.8515, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.433193666783084e-06, |
|
"loss": 1.8978, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.448861467610187e-06, |
|
"loss": 1.889, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.4641972984001906e-06, |
|
"loss": 1.8667, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.479214935357724e-06, |
|
"loss": 1.9304, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.493927314555554e-06, |
|
"loss": 1.9042, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gsm8k_hard_accuracy": 0.8928993119172672, |
|
"eval_gsm8k_hard_loss": 0.4951171875, |
|
"eval_gsm8k_hard_runtime": 2.1138, |
|
"eval_gsm8k_hard_samples_per_second": 124.893, |
|
"eval_gsm8k_hard_steps_per_second": 8.042, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_webgpt_accuracy": 0.4654303097674511, |
|
"eval_webgpt_loss": 2.478515625, |
|
"eval_webgpt_runtime": 13.6298, |
|
"eval_webgpt_samples_per_second": 287.312, |
|
"eval_webgpt_steps_per_second": 17.975, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_squad_v2_accuracy": 0.8681394546082154, |
|
"eval_squad_v2_loss": 0.51806640625, |
|
"eval_squad_v2_runtime": 80.2931, |
|
"eval_squad_v2_samples_per_second": 324.611, |
|
"eval_squad_v2_steps_per_second": 20.288, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_adversarial_qa_accuracy": 0.7833800465144016, |
|
"eval_adversarial_qa_loss": 1.310546875, |
|
"eval_adversarial_qa_runtime": 19.1554, |
|
"eval_adversarial_qa_samples_per_second": 313.228, |
|
"eval_adversarial_qa_steps_per_second": 19.577, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_private_tuning_accuracy": 0.6404945703123248, |
|
"eval_private_tuning_loss": 1.3779296875, |
|
"eval_private_tuning_runtime": 68.286, |
|
"eval_private_tuning_samples_per_second": 310.137, |
|
"eval_private_tuning_steps_per_second": 19.389, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_oa_translated_accuracy": 0.6488286598439107, |
|
"eval_oa_translated_loss": 1.5576171875, |
|
"eval_oa_translated_runtime": 524.5762, |
|
"eval_oa_translated_samples_per_second": 254.918, |
|
"eval_oa_translated_steps_per_second": 15.933, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_prosocial_dialogue_accuracy": 0.52144098641849, |
|
"eval_prosocial_dialogue_loss": 1.90625, |
|
"eval_prosocial_dialogue_runtime": 90.5414, |
|
"eval_prosocial_dialogue_samples_per_second": 298.018, |
|
"eval_prosocial_dialogue_steps_per_second": 18.632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_math_qa_accuracy": 0.5165153098127461, |
|
"eval_math_qa_loss": 2.20703125, |
|
"eval_math_qa_runtime": 17.7049, |
|
"eval_math_qa_samples_per_second": 337.082, |
|
"eval_math_qa_steps_per_second": 21.068, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wikihow_accuracy": 0.5831415499792042, |
|
"eval_wikihow_loss": 2.140625, |
|
"eval_wikihow_runtime": 9.4415, |
|
"eval_wikihow_samples_per_second": 242.863, |
|
"eval_wikihow_steps_per_second": 15.252, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_joke_accuracy": 0.45545868081880214, |
|
"eval_joke_loss": 2.529296875, |
|
"eval_joke_runtime": 1.3918, |
|
"eval_joke_samples_per_second": 54.606, |
|
"eval_joke_steps_per_second": 3.593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gsm8k_accuracy": 0.7111711283077483, |
|
"eval_gsm8k_loss": 1.1416015625, |
|
"eval_gsm8k_runtime": 6.0874, |
|
"eval_gsm8k_samples_per_second": 245.588, |
|
"eval_gsm8k_steps_per_second": 15.442, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_en-hi_accuracy": 0.5343350158469304, |
|
"eval_ted_trans_en-hi_loss": 2.244140625, |
|
"eval_ted_trans_en-hi_runtime": 1.2611, |
|
"eval_ted_trans_en-hi_samples_per_second": 81.672, |
|
"eval_ted_trans_en-hi_steps_per_second": 5.551, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_de-ja_accuracy": 0.5195722742027878, |
|
"eval_ted_trans_de-ja_loss": 2.314453125, |
|
"eval_ted_trans_de-ja_runtime": 3.7052, |
|
"eval_ted_trans_de-ja_samples_per_second": 193.783, |
|
"eval_ted_trans_de-ja_steps_per_second": 12.145, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_nl-en_accuracy": 0.6433630400125447, |
|
"eval_ted_trans_nl-en_loss": 1.7353515625, |
|
"eval_ted_trans_nl-en_runtime": 3.5777, |
|
"eval_ted_trans_nl-en_samples_per_second": 215.5, |
|
"eval_ted_trans_nl-en_steps_per_second": 13.696, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_en-ja_accuracy": 0.5440905817396176, |
|
"eval_ted_trans_en-ja_loss": 2.13671875, |
|
"eval_ted_trans_en-ja_runtime": 3.7065, |
|
"eval_ted_trans_en-ja_samples_per_second": 216.109, |
|
"eval_ted_trans_en-ja_steps_per_second": 13.76, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_en-es_accuracy": 0.7055326931870142, |
|
"eval_ted_trans_en-es_loss": 1.3369140625, |
|
"eval_ted_trans_en-es_runtime": 3.3519, |
|
"eval_ted_trans_en-es_samples_per_second": 246.427, |
|
"eval_ted_trans_en-es_steps_per_second": 15.514, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_en-ms_accuracy": 0.5517070757050965, |
|
"eval_ted_trans_en-ms_loss": 2.32421875, |
|
"eval_ted_trans_en-ms_runtime": 0.8373, |
|
"eval_ted_trans_en-ms_samples_per_second": 50.159, |
|
"eval_ted_trans_en-ms_steps_per_second": 3.583, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_xsum_accuracy": 0.5663549372439451, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 140.8411, |
|
"eval_xsum_samples_per_second": 289.752, |
|
"eval_xsum_steps_per_second": 18.113, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_cnn_dailymail_accuracy": 0.6501961820900207, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 207.5377, |
|
"eval_cnn_dailymail_samples_per_second": 276.687, |
|
"eval_cnn_dailymail_steps_per_second": 17.293, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_multi_news_accuracy": 0.5168385769568282, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 36.1577, |
|
"eval_multi_news_samples_per_second": 248.771, |
|
"eval_multi_news_steps_per_second": 15.571, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_tldr_news_accuracy": 0.5048904354368475, |
|
"eval_tldr_news_loss": 2.384765625, |
|
"eval_tldr_news_runtime": 5.9032, |
|
"eval_tldr_news_samples_per_second": 241.901, |
|
"eval_tldr_news_steps_per_second": 15.246, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_scitldr_accuracy": 0.5, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.3974, |
|
"eval_scitldr_samples_per_second": 166.428, |
|
"eval_scitldr_steps_per_second": 10.428, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_samsum_accuracy": 0.5789020336200051, |
|
"eval_samsum_loss": 1.619140625, |
|
"eval_samsum_runtime": 10.1073, |
|
"eval_samsum_samples_per_second": 291.572, |
|
"eval_samsum_steps_per_second": 18.304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_debate_sum_accuracy": 0.9321960723793357, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 188.2954, |
|
"eval_debate_sum_samples_per_second": 255.524, |
|
"eval_debate_sum_steps_per_second": 15.975, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_billsum_accuracy": 0.6453599014888616, |
|
"eval_billsum_loss": NaN, |
|
"eval_billsum_runtime": 22.3683, |
|
"eval_billsum_samples_per_second": 169.436, |
|
"eval_billsum_steps_per_second": 10.595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wmt2019_zh-en_accuracy": 0.5524590644131345, |
|
"eval_wmt2019_zh-en_loss": 2.146484375, |
|
"eval_wmt2019_zh-en_runtime": 13.8635, |
|
"eval_wmt2019_zh-en_samples_per_second": 287.158, |
|
"eval_wmt2019_zh-en_steps_per_second": 17.961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wmt2019_ru-en_accuracy": 0.6308636370293347, |
|
"eval_wmt2019_ru-en_loss": 1.580078125, |
|
"eval_wmt2019_ru-en_runtime": 11.2038, |
|
"eval_wmt2019_ru-en_samples_per_second": 267.766, |
|
"eval_wmt2019_ru-en_steps_per_second": 16.78, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wmt2019_de-en_accuracy": 0.657534107930853, |
|
"eval_wmt2019_de-en_loss": 1.501953125, |
|
"eval_wmt2019_de-en_runtime": 9.454, |
|
"eval_wmt2019_de-en_samples_per_second": 317.115, |
|
"eval_wmt2019_de-en_steps_per_second": 19.886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wmt2019_fr-de_accuracy": 0.6479142094481346, |
|
"eval_wmt2019_fr-de_loss": 1.5439453125, |
|
"eval_wmt2019_fr-de_runtime": 5.2625, |
|
"eval_wmt2019_fr-de_samples_per_second": 287.315, |
|
"eval_wmt2019_fr-de_steps_per_second": 18.052, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_essay_instruction_accuracy": 0.5775154438520775, |
|
"eval_essay_instruction_loss": 2.09765625, |
|
"eval_essay_instruction_runtime": 3.0793, |
|
"eval_essay_instruction_samples_per_second": 134.122, |
|
"eval_essay_instruction_steps_per_second": 8.443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_reddit_eli5_accuracy": 0.42260973997710893, |
|
"eval_reddit_eli5_loss": 2.76171875, |
|
"eval_reddit_eli5_runtime": 203.2121, |
|
"eval_reddit_eli5_samples_per_second": 268.326, |
|
"eval_reddit_eli5_steps_per_second": 16.771, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_reddit_askh_accuracy": 0.42568767737796204, |
|
"eval_reddit_askh_loss": 2.84375, |
|
"eval_reddit_askh_runtime": 111.1784, |
|
"eval_reddit_askh_samples_per_second": 177.238, |
|
"eval_reddit_askh_steps_per_second": 11.081, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_reddit_asks_accuracy": 0.43555163138333913, |
|
"eval_reddit_asks_loss": 2.689453125, |
|
"eval_reddit_asks_runtime": 119.9403, |
|
"eval_reddit_asks_samples_per_second": 219.743, |
|
"eval_reddit_asks_steps_per_second": 13.74, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5083465988888945e-06, |
|
"loss": 1.8966, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5224842384899045e-06, |
|
"loss": 1.9039, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5363510253542444e-06, |
|
"loss": 1.9029, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.549957142832593e-06, |
|
"loss": 1.8759, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.563312210555719e-06, |
|
"loss": 1.9042, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.576425325289549e-06, |
|
"loss": 1.9205, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.589305098154845e-06, |
|
"loss": 1.9324, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.601959688592886e-06, |
|
"loss": 1.8757, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.614396835412691e-06, |
|
"loss": 1.895, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.626623885215616e-06, |
|
"loss": 1.9004, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.638647818458763e-06, |
|
"loss": 1.8705, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.650475273388737e-06, |
|
"loss": 1.8929, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.662112568051194e-06, |
|
"loss": 1.8745, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.673565720558918e-06, |
|
"loss": 1.8768, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.6848404677811685e-06, |
|
"loss": 1.885, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.695942282599635e-06, |
|
"loss": 1.8496, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.706876389860915e-06, |
|
"loss": 1.9061, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.717647781141908e-06, |
|
"loss": 1.8839, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.7282612284325845e-06, |
|
"loss": 1.921, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.738721296830016e-06, |
|
"loss": 1.8519, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.749032356328167e-06, |
|
"loss": 1.8901, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.759198592779668e-06, |
|
"loss": 1.8678, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.769224018098397e-06, |
|
"loss": 1.859, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7791124797650865e-06, |
|
"loss": 1.8315, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.788867669692332e-06, |
|
"loss": 1.8915, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.798493132500121e-06, |
|
"loss": 1.8936, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8079922732483016e-06, |
|
"loss": 1.8869, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.817368364668191e-06, |
|
"loss": 1.8556, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8266245539317745e-06, |
|
"loss": 1.8594, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.835763868993521e-06, |
|
"loss": 1.8646, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.844789224536785e-06, |
|
"loss": 1.8758, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.853703427554027e-06, |
|
"loss": 1.8349, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.862509182587578e-06, |
|
"loss": 1.8517, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.871209096655434e-06, |
|
"loss": 1.8563, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.879805683884512e-06, |
|
"loss": 1.8749, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.888301369871998e-06, |
|
"loss": 1.8276, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.8966984957936845e-06, |
|
"loss": 1.7967, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.904999322276735e-06, |
|
"loss": 1.9041, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.913206033052878e-06, |
|
"loss": 1.8417, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.921320738406821e-06, |
|
"loss": 1.8611, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.929345478433492e-06, |
|
"loss": 1.8924, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.937282226116702e-06, |
|
"loss": 1.8684, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.945132890240829e-06, |
|
"loss": 1.8292, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.952899318146298e-06, |
|
"loss": 1.8498, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.96058329833879e-06, |
|
"loss": 1.8944, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.968186562961406e-06, |
|
"loss": 1.885, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.975710790138337e-06, |
|
"loss": 1.8469, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9831576061979556e-06, |
|
"loss": 1.8536, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.990528587782728e-06, |
|
"loss": 1.8514, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.99782526385276e-06, |
|
"loss": 1.8168, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gsm8k_hard_accuracy": 0.9009201579740238, |
|
"eval_gsm8k_hard_loss": 0.44189453125, |
|
"eval_gsm8k_hard_runtime": 1.5125, |
|
"eval_gsm8k_hard_samples_per_second": 174.543, |
|
"eval_gsm8k_hard_steps_per_second": 11.24, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_webgpt_accuracy": 0.4676998865211623, |
|
"eval_webgpt_loss": 2.44921875, |
|
"eval_webgpt_runtime": 15.1394, |
|
"eval_webgpt_samples_per_second": 258.664, |
|
"eval_webgpt_steps_per_second": 16.183, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_squad_v2_accuracy": 0.8740146386480032, |
|
"eval_squad_v2_loss": 0.456298828125, |
|
"eval_squad_v2_runtime": 77.6449, |
|
"eval_squad_v2_samples_per_second": 335.682, |
|
"eval_squad_v2_steps_per_second": 20.98, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_adversarial_qa_accuracy": 0.7841552865406405, |
|
"eval_adversarial_qa_loss": 1.16796875, |
|
"eval_adversarial_qa_runtime": 20.3196, |
|
"eval_adversarial_qa_samples_per_second": 295.282, |
|
"eval_adversarial_qa_steps_per_second": 18.455, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_private_tuning_accuracy": 0.6468452789452516, |
|
"eval_private_tuning_loss": 1.33203125, |
|
"eval_private_tuning_runtime": 62.217, |
|
"eval_private_tuning_samples_per_second": 340.389, |
|
"eval_private_tuning_steps_per_second": 21.28, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_oa_translated_accuracy": 0.6605713712776647, |
|
"eval_oa_translated_loss": 1.4912109375, |
|
"eval_oa_translated_runtime": 498.0305, |
|
"eval_oa_translated_samples_per_second": 268.506, |
|
"eval_oa_translated_steps_per_second": 16.782, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_prosocial_dialogue_accuracy": 0.5267998067934173, |
|
"eval_prosocial_dialogue_loss": 1.9033203125, |
|
"eval_prosocial_dialogue_runtime": 126.2272, |
|
"eval_prosocial_dialogue_samples_per_second": 213.765, |
|
"eval_prosocial_dialogue_steps_per_second": 13.365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_math_qa_accuracy": 0.5343074095293895, |
|
"eval_math_qa_loss": 2.080078125, |
|
"eval_math_qa_runtime": 19.3631, |
|
"eval_math_qa_samples_per_second": 308.215, |
|
"eval_math_qa_steps_per_second": 19.263, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wikihow_accuracy": 0.5909261056425897, |
|
"eval_wikihow_loss": 2.078125, |
|
"eval_wikihow_runtime": 7.7313, |
|
"eval_wikihow_samples_per_second": 296.588, |
|
"eval_wikihow_steps_per_second": 18.626, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_joke_accuracy": 0.45830174374526156, |
|
"eval_joke_loss": 2.498046875, |
|
"eval_joke_runtime": 2.2389, |
|
"eval_joke_samples_per_second": 33.945, |
|
"eval_joke_steps_per_second": 2.233, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gsm8k_accuracy": 0.7258224765956256, |
|
"eval_gsm8k_loss": 1.0576171875, |
|
"eval_gsm8k_runtime": 6.1435, |
|
"eval_gsm8k_samples_per_second": 243.346, |
|
"eval_gsm8k_steps_per_second": 15.301, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_en-hi_accuracy": 0.5460148777895856, |
|
"eval_ted_trans_en-hi_loss": 2.138671875, |
|
"eval_ted_trans_en-hi_runtime": 0.5653, |
|
"eval_ted_trans_en-hi_samples_per_second": 182.218, |
|
"eval_ted_trans_en-hi_steps_per_second": 12.384, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_de-ja_accuracy": 0.5330968145857443, |
|
"eval_ted_trans_de-ja_loss": 2.193359375, |
|
"eval_ted_trans_de-ja_runtime": 3.858, |
|
"eval_ted_trans_de-ja_samples_per_second": 186.106, |
|
"eval_ted_trans_de-ja_steps_per_second": 11.664, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_nl-en_accuracy": 0.6414471117584333, |
|
"eval_ted_trans_nl-en_loss": 1.689453125, |
|
"eval_ted_trans_nl-en_runtime": 3.1044, |
|
"eval_ted_trans_nl-en_samples_per_second": 248.36, |
|
"eval_ted_trans_nl-en_steps_per_second": 15.784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_en-ja_accuracy": 0.5530009680542111, |
|
"eval_ted_trans_en-ja_loss": 2.05859375, |
|
"eval_ted_trans_en-ja_runtime": 3.8822, |
|
"eval_ted_trans_en-ja_samples_per_second": 206.324, |
|
"eval_ted_trans_en-ja_steps_per_second": 13.137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_en-es_accuracy": 0.7106248418922337, |
|
"eval_ted_trans_en-es_loss": 1.2880859375, |
|
"eval_ted_trans_en-es_runtime": 2.9435, |
|
"eval_ted_trans_en-es_samples_per_second": 280.621, |
|
"eval_ted_trans_en-es_steps_per_second": 17.666, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_en-ms_accuracy": 0.5987135081642752, |
|
"eval_ted_trans_en-ms_loss": 1.984375, |
|
"eval_ted_trans_en-ms_runtime": 1.4559, |
|
"eval_ted_trans_en-ms_samples_per_second": 28.848, |
|
"eval_ted_trans_en-ms_steps_per_second": 2.061, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_xsum_accuracy": 0.5722584941442159, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 141.7203, |
|
"eval_xsum_samples_per_second": 287.955, |
|
"eval_xsum_steps_per_second": 18.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_cnn_dailymail_accuracy": 0.6576155822271417, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 209.5351, |
|
"eval_cnn_dailymail_samples_per_second": 274.05, |
|
"eval_cnn_dailymail_steps_per_second": 17.128, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_multi_news_accuracy": 0.5226291863275405, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 36.8798, |
|
"eval_multi_news_samples_per_second": 243.9, |
|
"eval_multi_news_steps_per_second": 15.266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_tldr_news_accuracy": 0.5359729145114267, |
|
"eval_tldr_news_loss": 2.20703125, |
|
"eval_tldr_news_runtime": 4.9335, |
|
"eval_tldr_news_samples_per_second": 289.451, |
|
"eval_tldr_news_steps_per_second": 18.243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_scitldr_accuracy": 0.49700598802395207, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 1.5917, |
|
"eval_scitldr_samples_per_second": 250.67, |
|
"eval_scitldr_steps_per_second": 15.706, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_samsum_accuracy": 0.590799585469913, |
|
"eval_samsum_loss": 1.5537109375, |
|
"eval_samsum_runtime": 10.6642, |
|
"eval_samsum_samples_per_second": 276.345, |
|
"eval_samsum_steps_per_second": 17.348, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_debate_sum_accuracy": 0.9329163674973446, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 196.1179, |
|
"eval_debate_sum_samples_per_second": 245.332, |
|
"eval_debate_sum_steps_per_second": 15.338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_billsum_accuracy": 0.6510711811280909, |
|
"eval_billsum_loss": NaN, |
|
"eval_billsum_runtime": 16.6536, |
|
"eval_billsum_samples_per_second": 227.579, |
|
"eval_billsum_steps_per_second": 14.231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wmt2019_zh-en_accuracy": 0.5587294145226513, |
|
"eval_wmt2019_zh-en_loss": 2.111328125, |
|
"eval_wmt2019_zh-en_runtime": 12.8767, |
|
"eval_wmt2019_zh-en_samples_per_second": 309.164, |
|
"eval_wmt2019_zh-en_steps_per_second": 19.337, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wmt2019_ru-en_accuracy": 0.6366095054310483, |
|
"eval_wmt2019_ru-en_loss": 1.552734375, |
|
"eval_wmt2019_ru-en_runtime": 10.1123, |
|
"eval_wmt2019_ru-en_samples_per_second": 296.667, |
|
"eval_wmt2019_ru-en_steps_per_second": 18.591, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wmt2019_de-en_accuracy": 0.6681106028096029, |
|
"eval_wmt2019_de-en_loss": 1.4462890625, |
|
"eval_wmt2019_de-en_runtime": 9.913, |
|
"eval_wmt2019_de-en_samples_per_second": 302.432, |
|
"eval_wmt2019_de-en_steps_per_second": 18.965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wmt2019_fr-de_accuracy": 0.6540822971254923, |
|
"eval_wmt2019_fr-de_loss": 1.51171875, |
|
"eval_wmt2019_fr-de_runtime": 5.7364, |
|
"eval_wmt2019_fr-de_samples_per_second": 263.579, |
|
"eval_wmt2019_fr-de_steps_per_second": 16.561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_essay_instruction_accuracy": 0.5807311500380807, |
|
"eval_essay_instruction_loss": 2.072265625, |
|
"eval_essay_instruction_runtime": 4.2906, |
|
"eval_essay_instruction_samples_per_second": 96.257, |
|
"eval_essay_instruction_steps_per_second": 6.06, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_reddit_eli5_accuracy": 0.42394149731958486, |
|
"eval_reddit_eli5_loss": 2.748046875, |
|
"eval_reddit_eli5_runtime": 220.7861, |
|
"eval_reddit_eli5_samples_per_second": 246.968, |
|
"eval_reddit_eli5_steps_per_second": 15.436, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_reddit_askh_accuracy": 0.42705794870139985, |
|
"eval_reddit_askh_loss": 2.826171875, |
|
"eval_reddit_askh_runtime": 106.0159, |
|
"eval_reddit_askh_samples_per_second": 185.868, |
|
"eval_reddit_askh_steps_per_second": 11.621, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_reddit_asks_accuracy": 0.43686793419350517, |
|
"eval_reddit_asks_loss": 2.67578125, |
|
"eval_reddit_asks_runtime": 110.0537, |
|
"eval_reddit_asks_samples_per_second": 239.483, |
|
"eval_reddit_asks_steps_per_second": 14.975, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.997313753581662e-06, |
|
"loss": 1.8558, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.992836676217766e-06, |
|
"loss": 1.862, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.988359598853868e-06, |
|
"loss": 1.8667, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9838825214899716e-06, |
|
"loss": 1.8597, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.979405444126075e-06, |
|
"loss": 1.8472, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.974928366762178e-06, |
|
"loss": 1.8552, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9704512893982816e-06, |
|
"loss": 1.8532, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.965974212034385e-06, |
|
"loss": 1.8235, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.961497134670487e-06, |
|
"loss": 1.861, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.957020057306591e-06, |
|
"loss": 1.7964, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.952542979942694e-06, |
|
"loss": 1.8203, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9480659025787965e-06, |
|
"loss": 1.8759, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9435888252149e-06, |
|
"loss": 1.8592, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.939111747851003e-06, |
|
"loss": 1.8487, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9346346704871065e-06, |
|
"loss": 1.8112, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.930157593123209e-06, |
|
"loss": 1.8028, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.925680515759313e-06, |
|
"loss": 1.8361, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.921203438395416e-06, |
|
"loss": 1.8625, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.916726361031519e-06, |
|
"loss": 1.8345, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.912249283667622e-06, |
|
"loss": 1.8506, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.907772206303726e-06, |
|
"loss": 1.8326, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.903295128939828e-06, |
|
"loss": 1.8399, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8988180515759315e-06, |
|
"loss": 1.8706, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.894340974212035e-06, |
|
"loss": 1.8227, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.889863896848137e-06, |
|
"loss": 1.8461, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8853868194842415e-06, |
|
"loss": 1.874, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.880909742120344e-06, |
|
"loss": 1.8178, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.876432664756447e-06, |
|
"loss": 1.8141, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.871955587392551e-06, |
|
"loss": 1.8258, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.867478510028654e-06, |
|
"loss": 1.8595, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8630014326647565e-06, |
|
"loss": 1.8495, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.85852435530086e-06, |
|
"loss": 1.8492, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.854047277936963e-06, |
|
"loss": 1.8339, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.849570200573066e-06, |
|
"loss": 1.8218, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.84509312320917e-06, |
|
"loss": 1.8411, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.840616045845273e-06, |
|
"loss": 1.8275, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.836138968481376e-06, |
|
"loss": 1.8358, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.831661891117479e-06, |
|
"loss": 1.8395, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.827184813753582e-06, |
|
"loss": 1.8114, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.822707736389685e-06, |
|
"loss": 1.8167, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.818230659025788e-06, |
|
"loss": 1.8502, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.8137535816618915e-06, |
|
"loss": 1.8494, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.809276504297995e-06, |
|
"loss": 1.8546, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.804799426934098e-06, |
|
"loss": 1.8219, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8003223495702015e-06, |
|
"loss": 1.856, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.795845272206304e-06, |
|
"loss": 1.8235, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.791368194842407e-06, |
|
"loss": 1.8084, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.786891117478511e-06, |
|
"loss": 1.8036, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.782414040114613e-06, |
|
"loss": 1.807, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.7779369627507165e-06, |
|
"loss": 1.8223, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_gsm8k_hard_accuracy": 0.9040755669557429, |
|
"eval_gsm8k_hard_loss": 0.418212890625, |
|
"eval_gsm8k_hard_runtime": 1.5226, |
|
"eval_gsm8k_hard_samples_per_second": 173.393, |
|
"eval_gsm8k_hard_steps_per_second": 11.165, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_webgpt_accuracy": 0.4685187206997972, |
|
"eval_webgpt_loss": 2.43359375, |
|
"eval_webgpt_runtime": 16.9148, |
|
"eval_webgpt_samples_per_second": 231.513, |
|
"eval_webgpt_steps_per_second": 14.484, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_squad_v2_accuracy": 0.8753178869062296, |
|
"eval_squad_v2_loss": 0.424560546875, |
|
"eval_squad_v2_runtime": 78.195, |
|
"eval_squad_v2_samples_per_second": 333.32, |
|
"eval_squad_v2_steps_per_second": 20.833, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_adversarial_qa_accuracy": 0.7873755143419405, |
|
"eval_adversarial_qa_loss": 1.076171875, |
|
"eval_adversarial_qa_runtime": 18.1293, |
|
"eval_adversarial_qa_samples_per_second": 330.956, |
|
"eval_adversarial_qa_steps_per_second": 20.685, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_private_tuning_accuracy": 0.6490207424086791, |
|
"eval_private_tuning_loss": 1.30859375, |
|
"eval_private_tuning_runtime": 65.2643, |
|
"eval_private_tuning_samples_per_second": 324.496, |
|
"eval_private_tuning_steps_per_second": 20.287, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_oa_translated_accuracy": 0.6676994033045951, |
|
"eval_oa_translated_loss": 1.447265625, |
|
"eval_oa_translated_runtime": 495.9674, |
|
"eval_oa_translated_samples_per_second": 269.623, |
|
"eval_oa_translated_steps_per_second": 16.852, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_prosocial_dialogue_accuracy": 0.52370774081413, |
|
"eval_prosocial_dialogue_loss": 1.802734375, |
|
"eval_prosocial_dialogue_runtime": 117.5764, |
|
"eval_prosocial_dialogue_samples_per_second": 229.493, |
|
"eval_prosocial_dialogue_steps_per_second": 14.348, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_math_qa_accuracy": 0.5447965302424216, |
|
"eval_math_qa_loss": 2.015625, |
|
"eval_math_qa_runtime": 19.5318, |
|
"eval_math_qa_samples_per_second": 305.553, |
|
"eval_math_qa_steps_per_second": 19.097, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wikihow_accuracy": 0.5924303341189519, |
|
"eval_wikihow_loss": 2.048828125, |
|
"eval_wikihow_runtime": 8.7505, |
|
"eval_wikihow_samples_per_second": 262.042, |
|
"eval_wikihow_steps_per_second": 16.456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_joke_accuracy": 0.46341925701288855, |
|
"eval_joke_loss": 2.439453125, |
|
"eval_joke_runtime": 0.9638, |
|
"eval_joke_samples_per_second": 78.857, |
|
"eval_joke_steps_per_second": 5.188, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_gsm8k_accuracy": 0.7337848616728005, |
|
"eval_gsm8k_loss": 1.01171875, |
|
"eval_gsm8k_runtime": 6.3369, |
|
"eval_gsm8k_samples_per_second": 235.921, |
|
"eval_gsm8k_steps_per_second": 14.834, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_en-hi_accuracy": 0.5467649647887324, |
|
"eval_ted_trans_en-hi_loss": 2.080078125, |
|
"eval_ted_trans_en-hi_runtime": 0.6116, |
|
"eval_ted_trans_en-hi_samples_per_second": 168.402, |
|
"eval_ted_trans_en-hi_steps_per_second": 11.445, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_de-ja_accuracy": 0.5412865271482303, |
|
"eval_ted_trans_de-ja_loss": 2.130859375, |
|
"eval_ted_trans_de-ja_runtime": 3.781, |
|
"eval_ted_trans_de-ja_samples_per_second": 189.896, |
|
"eval_ted_trans_de-ja_steps_per_second": 11.902, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_nl-en_accuracy": 0.6488075112486671, |
|
"eval_ted_trans_nl-en_loss": 1.65625, |
|
"eval_ted_trans_nl-en_runtime": 3.2481, |
|
"eval_ted_trans_nl-en_samples_per_second": 237.369, |
|
"eval_ted_trans_nl-en_steps_per_second": 15.086, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_en-ja_accuracy": 0.5640618403329245, |
|
"eval_ted_trans_en-ja_loss": 1.990234375, |
|
"eval_ted_trans_en-ja_runtime": 3.5175, |
|
"eval_ted_trans_en-ja_samples_per_second": 227.717, |
|
"eval_ted_trans_en-ja_steps_per_second": 14.499, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_en-es_accuracy": 0.7159835441109249, |
|
"eval_ted_trans_en-es_loss": 1.25, |
|
"eval_ted_trans_en-es_runtime": 3.1192, |
|
"eval_ted_trans_en-es_samples_per_second": 264.814, |
|
"eval_ted_trans_en-es_steps_per_second": 16.671, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_en-ms_accuracy": 0.5680356259277586, |
|
"eval_ted_trans_en-ms_loss": 2.125, |
|
"eval_ted_trans_en-ms_runtime": 1.4378, |
|
"eval_ted_trans_en-ms_samples_per_second": 29.212, |
|
"eval_ted_trans_en-ms_steps_per_second": 2.087, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_xsum_accuracy": 0.575791398307109, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 142.8893, |
|
"eval_xsum_samples_per_second": 285.599, |
|
"eval_xsum_steps_per_second": 17.853, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_cnn_dailymail_accuracy": 0.6578154814514997, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 210.3199, |
|
"eval_cnn_dailymail_samples_per_second": 273.027, |
|
"eval_cnn_dailymail_steps_per_second": 17.064, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_multi_news_accuracy": 0.5236211410651092, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 35.5484, |
|
"eval_multi_news_samples_per_second": 253.035, |
|
"eval_multi_news_steps_per_second": 15.838, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_tldr_news_accuracy": 0.5471644879149816, |
|
"eval_tldr_news_loss": 2.119140625, |
|
"eval_tldr_news_runtime": 4.1143, |
|
"eval_tldr_news_samples_per_second": 347.078, |
|
"eval_tldr_news_steps_per_second": 21.875, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_scitldr_accuracy": 0.49550898203592814, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.4172, |
|
"eval_scitldr_samples_per_second": 165.069, |
|
"eval_scitldr_steps_per_second": 10.343, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_samsum_accuracy": 0.5926165192931454, |
|
"eval_samsum_loss": 1.5224609375, |
|
"eval_samsum_runtime": 10.6814, |
|
"eval_samsum_samples_per_second": 275.901, |
|
"eval_samsum_steps_per_second": 17.32, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_debate_sum_accuracy": 0.9358983757089394, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 196.0638, |
|
"eval_debate_sum_samples_per_second": 245.4, |
|
"eval_debate_sum_steps_per_second": 15.342, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_billsum_accuracy": 0.653463309552768, |
|
"eval_billsum_loss": NaN, |
|
"eval_billsum_runtime": 16.6514, |
|
"eval_billsum_samples_per_second": 227.609, |
|
"eval_billsum_steps_per_second": 14.233, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wmt2019_zh-en_accuracy": 0.5612179149240267, |
|
"eval_wmt2019_zh-en_loss": 2.091796875, |
|
"eval_wmt2019_zh-en_runtime": 13.0415, |
|
"eval_wmt2019_zh-en_samples_per_second": 305.255, |
|
"eval_wmt2019_zh-en_steps_per_second": 19.093, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wmt2019_ru-en_accuracy": 0.6424937502741108, |
|
"eval_wmt2019_ru-en_loss": 1.5146484375, |
|
"eval_wmt2019_ru-en_runtime": 9.2157, |
|
"eval_wmt2019_ru-en_samples_per_second": 325.531, |
|
"eval_wmt2019_ru-en_steps_per_second": 20.4, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wmt2019_de-en_accuracy": 0.6710659487470143, |
|
"eval_wmt2019_de-en_loss": 1.4248046875, |
|
"eval_wmt2019_de-en_runtime": 10.6387, |
|
"eval_wmt2019_de-en_samples_per_second": 281.801, |
|
"eval_wmt2019_de-en_steps_per_second": 17.671, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wmt2019_fr-de_accuracy": 0.6660826692300537, |
|
"eval_wmt2019_fr-de_loss": 1.4521484375, |
|
"eval_wmt2019_fr-de_runtime": 5.6356, |
|
"eval_wmt2019_fr-de_samples_per_second": 268.295, |
|
"eval_wmt2019_fr-de_steps_per_second": 16.857, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_essay_instruction_accuracy": 0.5827727003469578, |
|
"eval_essay_instruction_loss": 2.05859375, |
|
"eval_essay_instruction_runtime": 4.1866, |
|
"eval_essay_instruction_samples_per_second": 98.649, |
|
"eval_essay_instruction_steps_per_second": 6.21, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_reddit_eli5_accuracy": 0.42421905510230506, |
|
"eval_reddit_eli5_loss": 2.73828125, |
|
"eval_reddit_eli5_runtime": 199.4145, |
|
"eval_reddit_eli5_samples_per_second": 273.436, |
|
"eval_reddit_eli5_steps_per_second": 17.09, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_reddit_askh_accuracy": 0.4276221576585814, |
|
"eval_reddit_askh_loss": 2.81640625, |
|
"eval_reddit_askh_runtime": 110.2213, |
|
"eval_reddit_askh_samples_per_second": 178.777, |
|
"eval_reddit_askh_steps_per_second": 11.178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_reddit_asks_accuracy": 0.4372012246587393, |
|
"eval_reddit_asks_loss": 2.66796875, |
|
"eval_reddit_asks_runtime": 131.6507, |
|
"eval_reddit_asks_samples_per_second": 200.196, |
|
"eval_reddit_asks_steps_per_second": 12.518, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.77345988538682e-06, |
|
"loss": 1.8033, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.768982808022923e-06, |
|
"loss": 1.8477, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.7645057306590265e-06, |
|
"loss": 1.8417, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.76002865329513e-06, |
|
"loss": 1.7781, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.755551575931232e-06, |
|
"loss": 1.808, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.751074498567336e-06, |
|
"loss": 1.8719, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.746597421203439e-06, |
|
"loss": 1.8382, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.742120343839542e-06, |
|
"loss": 1.7991, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.737643266475645e-06, |
|
"loss": 1.809, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.733166189111748e-06, |
|
"loss": 1.8206, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7286891117478515e-06, |
|
"loss": 1.8475, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.724212034383955e-06, |
|
"loss": 1.8342, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.719734957020058e-06, |
|
"loss": 1.8436, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.715257879656161e-06, |
|
"loss": 1.8198, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.710780802292264e-06, |
|
"loss": 1.8271, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.706303724928367e-06, |
|
"loss": 1.8584, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.701826647564471e-06, |
|
"loss": 1.8485, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.697349570200573e-06, |
|
"loss": 1.7872, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6928724928366764e-06, |
|
"loss": 1.8026, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.68839541547278e-06, |
|
"loss": 1.8138, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.683918338108882e-06, |
|
"loss": 1.7871, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6794412607449864e-06, |
|
"loss": 1.8459, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.67496418338109e-06, |
|
"loss": 1.8081, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.670487106017192e-06, |
|
"loss": 1.7956, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.666010028653296e-06, |
|
"loss": 1.779, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.661532951289399e-06, |
|
"loss": 1.8106, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.657055873925501e-06, |
|
"loss": 1.8225, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.652578796561605e-06, |
|
"loss": 1.8263, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.648101719197708e-06, |
|
"loss": 1.8232, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.643624641833811e-06, |
|
"loss": 1.751, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.639147564469915e-06, |
|
"loss": 1.8275, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.634670487106018e-06, |
|
"loss": 1.7738, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.6301934097421206e-06, |
|
"loss": 1.8064, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.625716332378224e-06, |
|
"loss": 1.7775, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.621239255014327e-06, |
|
"loss": 1.799, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.61676217765043e-06, |
|
"loss": 1.8349, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.612285100286533e-06, |
|
"loss": 1.7826, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.607808022922636e-06, |
|
"loss": 1.8322, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.60333094555874e-06, |
|
"loss": 1.8115, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.598853868194843e-06, |
|
"loss": 1.7811, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.594376790830946e-06, |
|
"loss": 1.8113, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.589899713467049e-06, |
|
"loss": 1.82, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.585422636103152e-06, |
|
"loss": 1.8562, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.5809455587392556e-06, |
|
"loss": 1.8197, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.576468481375359e-06, |
|
"loss": 1.8255, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.571991404011461e-06, |
|
"loss": 1.7827, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.567514326647565e-06, |
|
"loss": 1.7943, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.563037249283668e-06, |
|
"loss": 1.7536, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.558560171919771e-06, |
|
"loss": 1.783, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.554083094555875e-06, |
|
"loss": 1.7924, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gsm8k_hard_accuracy": 0.9076584829607915, |
|
"eval_gsm8k_hard_loss": 0.39697265625, |
|
"eval_gsm8k_hard_runtime": 2.2124, |
|
"eval_gsm8k_hard_samples_per_second": 119.327, |
|
"eval_gsm8k_hard_steps_per_second": 7.684, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_webgpt_accuracy": 0.4696865932858906, |
|
"eval_webgpt_loss": 2.423828125, |
|
"eval_webgpt_runtime": 14.9073, |
|
"eval_webgpt_samples_per_second": 262.691, |
|
"eval_webgpt_steps_per_second": 16.435, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_squad_v2_accuracy": 0.8851732615724923, |
|
"eval_squad_v2_loss": 0.40966796875, |
|
"eval_squad_v2_runtime": 80.4777, |
|
"eval_squad_v2_samples_per_second": 323.866, |
|
"eval_squad_v2_steps_per_second": 20.242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_adversarial_qa_accuracy": 0.7928618283737849, |
|
"eval_adversarial_qa_loss": 1.1015625, |
|
"eval_adversarial_qa_runtime": 18.1784, |
|
"eval_adversarial_qa_samples_per_second": 330.062, |
|
"eval_adversarial_qa_steps_per_second": 20.629, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_private_tuning_accuracy": 0.6519236031057545, |
|
"eval_private_tuning_loss": 1.2939453125, |
|
"eval_private_tuning_runtime": 67.7066, |
|
"eval_private_tuning_samples_per_second": 312.791, |
|
"eval_private_tuning_steps_per_second": 19.555, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_oa_translated_accuracy": 0.6739938039772579, |
|
"eval_oa_translated_loss": 1.4169921875, |
|
"eval_oa_translated_runtime": 489.9008, |
|
"eval_oa_translated_samples_per_second": 272.961, |
|
"eval_oa_translated_steps_per_second": 17.061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_prosocial_dialogue_accuracy": 0.5339765946198812, |
|
"eval_prosocial_dialogue_loss": 1.8515625, |
|
"eval_prosocial_dialogue_runtime": 112.602, |
|
"eval_prosocial_dialogue_samples_per_second": 239.632, |
|
"eval_prosocial_dialogue_steps_per_second": 14.982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_math_qa_accuracy": 0.5540153422185813, |
|
"eval_math_qa_loss": 1.9658203125, |
|
"eval_math_qa_runtime": 19.4551, |
|
"eval_math_qa_samples_per_second": 306.758, |
|
"eval_math_qa_steps_per_second": 19.172, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wikihow_accuracy": 0.5962636905587134, |
|
"eval_wikihow_loss": 2.03515625, |
|
"eval_wikihow_runtime": 8.6232, |
|
"eval_wikihow_samples_per_second": 265.91, |
|
"eval_wikihow_steps_per_second": 16.699, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_joke_accuracy": 0.4670204700530705, |
|
"eval_joke_loss": 2.40234375, |
|
"eval_joke_runtime": 0.908, |
|
"eval_joke_samples_per_second": 83.698, |
|
"eval_joke_steps_per_second": 5.506, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gsm8k_accuracy": 0.7402429297099117, |
|
"eval_gsm8k_loss": 0.97998046875, |
|
"eval_gsm8k_runtime": 5.491, |
|
"eval_gsm8k_samples_per_second": 272.266, |
|
"eval_gsm8k_steps_per_second": 17.119, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_en-hi_accuracy": 0.5490196078431373, |
|
"eval_ted_trans_en-hi_loss": 2.10546875, |
|
"eval_ted_trans_en-hi_runtime": 1.4695, |
|
"eval_ted_trans_en-hi_samples_per_second": 70.093, |
|
"eval_ted_trans_en-hi_steps_per_second": 4.764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_de-ja_accuracy": 0.5422406826169489, |
|
"eval_ted_trans_de-ja_loss": 2.119140625, |
|
"eval_ted_trans_de-ja_runtime": 2.8137, |
|
"eval_ted_trans_de-ja_samples_per_second": 255.176, |
|
"eval_ted_trans_de-ja_steps_per_second": 15.993, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_nl-en_accuracy": 0.6510702489011417, |
|
"eval_ted_trans_nl-en_loss": 1.6396484375, |
|
"eval_ted_trans_nl-en_runtime": 4.3357, |
|
"eval_ted_trans_nl-en_samples_per_second": 177.827, |
|
"eval_ted_trans_nl-en_steps_per_second": 11.302, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_en-ja_accuracy": 0.556157479064968, |
|
"eval_ted_trans_en-ja_loss": 2.017578125, |
|
"eval_ted_trans_en-ja_runtime": 3.2862, |
|
"eval_ted_trans_en-ja_samples_per_second": 243.744, |
|
"eval_ted_trans_en-ja_steps_per_second": 15.519, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_en-es_accuracy": 0.7188412420341738, |
|
"eval_ted_trans_en-es_loss": 1.2294921875, |
|
"eval_ted_trans_en-es_runtime": 4.2374, |
|
"eval_ted_trans_en-es_samples_per_second": 194.932, |
|
"eval_ted_trans_en-es_steps_per_second": 12.272, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_en-ms_accuracy": 0.5734784760019792, |
|
"eval_ted_trans_en-ms_loss": 2.06640625, |
|
"eval_ted_trans_en-ms_runtime": 0.629, |
|
"eval_ted_trans_en-ms_samples_per_second": 66.768, |
|
"eval_ted_trans_en-ms_steps_per_second": 4.769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_xsum_accuracy": 0.5781070399698887, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 144.895, |
|
"eval_xsum_samples_per_second": 281.645, |
|
"eval_xsum_steps_per_second": 17.606, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_cnn_dailymail_accuracy": 0.661322804206144, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 208.0253, |
|
"eval_cnn_dailymail_samples_per_second": 276.039, |
|
"eval_cnn_dailymail_steps_per_second": 17.253, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_multi_news_accuracy": 0.5257933653652501, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 34.8299, |
|
"eval_multi_news_samples_per_second": 258.255, |
|
"eval_multi_news_steps_per_second": 16.164, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_tldr_news_accuracy": 0.554453117652591, |
|
"eval_tldr_news_loss": 2.056640625, |
|
"eval_tldr_news_runtime": 4.219, |
|
"eval_tldr_news_samples_per_second": 338.467, |
|
"eval_tldr_news_steps_per_second": 21.332, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_scitldr_accuracy": 0.5014970059880239, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.5157, |
|
"eval_scitldr_samples_per_second": 158.605, |
|
"eval_scitldr_steps_per_second": 9.938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_samsum_accuracy": 0.5984307075274895, |
|
"eval_samsum_loss": 1.5048828125, |
|
"eval_samsum_runtime": 9.7229, |
|
"eval_samsum_samples_per_second": 303.099, |
|
"eval_samsum_steps_per_second": 19.027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_debate_sum_accuracy": 0.9366294751454436, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 191.1458, |
|
"eval_debate_sum_samples_per_second": 251.714, |
|
"eval_debate_sum_steps_per_second": 15.737, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_billsum_accuracy": 0.6557828398195577, |
|
"eval_billsum_loss": NaN, |
|
"eval_billsum_runtime": 21.6214, |
|
"eval_billsum_samples_per_second": 175.289, |
|
"eval_billsum_steps_per_second": 10.961, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wmt2019_zh-en_accuracy": 0.5675863966606366, |
|
"eval_wmt2019_zh-en_loss": 2.0625, |
|
"eval_wmt2019_zh-en_runtime": 12.8505, |
|
"eval_wmt2019_zh-en_samples_per_second": 309.793, |
|
"eval_wmt2019_zh-en_steps_per_second": 19.377, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wmt2019_ru-en_accuracy": 0.6426618715553412, |
|
"eval_wmt2019_ru-en_loss": 1.517578125, |
|
"eval_wmt2019_ru-en_runtime": 9.7092, |
|
"eval_wmt2019_ru-en_samples_per_second": 308.986, |
|
"eval_wmt2019_ru-en_steps_per_second": 19.363, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wmt2019_de-en_accuracy": 0.6722804744747176, |
|
"eval_wmt2019_de-en_loss": 1.4169921875, |
|
"eval_wmt2019_de-en_runtime": 10.2232, |
|
"eval_wmt2019_de-en_samples_per_second": 293.255, |
|
"eval_wmt2019_de-en_steps_per_second": 18.39, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wmt2019_fr-de_accuracy": 0.6611832925051939, |
|
"eval_wmt2019_fr-de_loss": 1.466796875, |
|
"eval_wmt2019_fr-de_runtime": 5.2327, |
|
"eval_wmt2019_fr-de_samples_per_second": 288.95, |
|
"eval_wmt2019_fr-de_steps_per_second": 18.155, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_essay_instruction_accuracy": 0.5831323516967082, |
|
"eval_essay_instruction_loss": 2.0546875, |
|
"eval_essay_instruction_runtime": 4.364, |
|
"eval_essay_instruction_samples_per_second": 94.638, |
|
"eval_essay_instruction_steps_per_second": 5.958, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_reddit_eli5_accuracy": 0.4256390849199412, |
|
"eval_reddit_eli5_loss": 2.732421875, |
|
"eval_reddit_eli5_runtime": 199.1709, |
|
"eval_reddit_eli5_samples_per_second": 273.77, |
|
"eval_reddit_eli5_steps_per_second": 17.111, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_reddit_askh_accuracy": 0.428839058527484, |
|
"eval_reddit_askh_loss": 2.80859375, |
|
"eval_reddit_askh_runtime": 129.3643, |
|
"eval_reddit_askh_samples_per_second": 152.322, |
|
"eval_reddit_askh_steps_per_second": 9.523, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_reddit_asks_accuracy": 0.43882877148313176, |
|
"eval_reddit_asks_loss": 2.662109375, |
|
"eval_reddit_asks_runtime": 99.4788, |
|
"eval_reddit_asks_samples_per_second": 264.941, |
|
"eval_reddit_asks_steps_per_second": 16.566, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 12168, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.6560121131357438e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|