|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.029488434635935788, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4084967333570947e-06, |
|
"loss": 2.0409, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0507482022971233e-06, |
|
"loss": 1.8182, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.385606273598312e-06, |
|
"loss": 1.6715, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6136695401116585e-06, |
|
"loss": 1.7864, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.7868297632261957e-06, |
|
"loss": 1.6323, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.926458092787486e-06, |
|
"loss": 1.6948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.0434580045013773e-06, |
|
"loss": 1.6492, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.1441512086208035e-06, |
|
"loss": 1.6454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.232532087697698e-06, |
|
"loss": 1.6685, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.3112862237770753e-06, |
|
"loss": 1.5988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.3823062961420163e-06, |
|
"loss": 1.5397, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.446976436243603e-06, |
|
"loss": 1.6389, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.506339534926595e-06, |
|
"loss": 1.5864, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.5612009452606784e-06, |
|
"loss": 1.6896, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.612195557913627e-06, |
|
"loss": 1.6217, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.65983275401539e-06, |
|
"loss": 1.6265, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.7045274519126395e-06, |
|
"loss": 1.6128, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.7466221106030114e-06, |
|
"loss": 1.5282, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.786402677560832e-06, |
|
"loss": 1.6623, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.824110376935989e-06, |
|
"loss": 1.5958, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.8599505757615295e-06, |
|
"loss": 1.6162, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.894099556414216e-06, |
|
"loss": 1.5936, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9267097619885385e-06, |
|
"loss": 1.5658, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.95791391001684e-06, |
|
"loss": 1.627, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.987828255432777e-06, |
|
"loss": 1.6155, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_hard_accuracy": 0.8835755873132202, |
|
"eval_gsm8k_hard_loss": 0.51513671875, |
|
"eval_gsm8k_hard_runtime": 6.8667, |
|
"eval_gsm8k_hard_samples_per_second": 38.446, |
|
"eval_gsm8k_hard_steps_per_second": 0.437, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_webgpt_accuracy": 0.49985688262224126, |
|
"eval_webgpt_loss": 2.197265625, |
|
"eval_webgpt_runtime": 38.8229, |
|
"eval_webgpt_samples_per_second": 100.868, |
|
"eval_webgpt_steps_per_second": 1.056, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_squad_v2_accuracy": 0.8732225651432517, |
|
"eval_squad_v2_loss": 0.394775390625, |
|
"eval_squad_v2_runtime": 212.1787, |
|
"eval_squad_v2_samples_per_second": 122.84, |
|
"eval_squad_v2_steps_per_second": 1.282, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_adversarial_qa_accuracy": 0.7885436255634161, |
|
"eval_adversarial_qa_loss": 0.84423828125, |
|
"eval_adversarial_qa_runtime": 53.4145, |
|
"eval_adversarial_qa_samples_per_second": 112.329, |
|
"eval_adversarial_qa_steps_per_second": 1.179, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_private_tuning_accuracy": 0.6697048468296535, |
|
"eval_private_tuning_loss": 1.234375, |
|
"eval_private_tuning_runtime": 147.3821, |
|
"eval_private_tuning_samples_per_second": 143.695, |
|
"eval_private_tuning_steps_per_second": 1.5, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_oa_translated_accuracy": 0.6888436472902636, |
|
"eval_oa_translated_loss": 1.271484375, |
|
"eval_oa_translated_runtime": 1288.641, |
|
"eval_oa_translated_samples_per_second": 91.051, |
|
"eval_oa_translated_steps_per_second": 0.949, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_prosocial_dialogue_accuracy": 0.5277240036359349, |
|
"eval_prosocial_dialogue_loss": 1.830078125, |
|
"eval_prosocial_dialogue_runtime": 61.2751, |
|
"eval_prosocial_dialogue_samples_per_second": 440.358, |
|
"eval_prosocial_dialogue_steps_per_second": 4.602, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_math_qa_accuracy": 0.5650762200656649, |
|
"eval_math_qa_loss": 1.912109375, |
|
"eval_math_qa_runtime": 43.4013, |
|
"eval_math_qa_samples_per_second": 137.507, |
|
"eval_math_qa_steps_per_second": 1.452, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wikihow_accuracy": 0.6096311191235613, |
|
"eval_wikihow_loss": 1.8701171875, |
|
"eval_wikihow_runtime": 16.5775, |
|
"eval_wikihow_samples_per_second": 138.32, |
|
"eval_wikihow_steps_per_second": 1.448, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_joke_accuracy": 0.49194465504169826, |
|
"eval_joke_loss": 2.216796875, |
|
"eval_joke_runtime": 2.4324, |
|
"eval_joke_samples_per_second": 31.245, |
|
"eval_joke_steps_per_second": 0.411, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_accuracy": 0.7494717398301172, |
|
"eval_gsm8k_loss": 0.9765625, |
|
"eval_gsm8k_runtime": 10.7538, |
|
"eval_gsm8k_samples_per_second": 139.021, |
|
"eval_gsm8k_steps_per_second": 1.488, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-hi_accuracy": 0.6902974158946855, |
|
"eval_ted_trans_en-hi_loss": 1.1455078125, |
|
"eval_ted_trans_en-hi_runtime": 4.0656, |
|
"eval_ted_trans_en-hi_samples_per_second": 25.335, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.492, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_de-ja_accuracy": 0.6504194889162561, |
|
"eval_ted_trans_de-ja_loss": 1.52734375, |
|
"eval_ted_trans_de-ja_runtime": 8.8337, |
|
"eval_ted_trans_de-ja_samples_per_second": 81.279, |
|
"eval_ted_trans_de-ja_steps_per_second": 0.906, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_nl-en_accuracy": 0.7506186489759386, |
|
"eval_ted_trans_nl-en_loss": 1.091796875, |
|
"eval_ted_trans_nl-en_runtime": 8.871, |
|
"eval_ted_trans_nl-en_samples_per_second": 86.913, |
|
"eval_ted_trans_nl-en_steps_per_second": 1.015, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ja_accuracy": 0.6557647009776333, |
|
"eval_ted_trans_en-ja_loss": 1.427734375, |
|
"eval_ted_trans_en-ja_runtime": 9.6821, |
|
"eval_ted_trans_en-ja_samples_per_second": 82.73, |
|
"eval_ted_trans_en-ja_steps_per_second": 0.93, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-es_accuracy": 0.7831022379328372, |
|
"eval_ted_trans_en-es_loss": 0.89599609375, |
|
"eval_ted_trans_en-es_runtime": 7.8367, |
|
"eval_ted_trans_en-es_samples_per_second": 105.401, |
|
"eval_ted_trans_en-es_steps_per_second": 1.148, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ms_accuracy": 0.689470871191876, |
|
"eval_ted_trans_en-ms_loss": 1.4052734375, |
|
"eval_ted_trans_en-ms_runtime": 1.3714, |
|
"eval_ted_trans_en-ms_samples_per_second": 30.625, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.729, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_xsum_accuracy": 0.6193833980292625, |
|
"eval_xsum_loss": 1.4599609375, |
|
"eval_xsum_runtime": 434.5368, |
|
"eval_xsum_samples_per_second": 93.914, |
|
"eval_xsum_steps_per_second": 0.98, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_cnn_dailymail_accuracy": 0.6712703040399833, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 624.2796, |
|
"eval_cnn_dailymail_samples_per_second": 91.983, |
|
"eval_cnn_dailymail_steps_per_second": 0.96, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_multi_news_accuracy": 0.5461545789406833, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 102.6315, |
|
"eval_multi_news_samples_per_second": 87.644, |
|
"eval_multi_news_steps_per_second": 0.916, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_tldr_news_accuracy": 0.5329163923633969, |
|
"eval_tldr_news_loss": 2.1640625, |
|
"eval_tldr_news_runtime": 7.304, |
|
"eval_tldr_news_samples_per_second": 195.509, |
|
"eval_tldr_news_steps_per_second": 2.054, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_scitldr_accuracy": 0.5056726094003241, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 6.0172, |
|
"eval_scitldr_samples_per_second": 66.309, |
|
"eval_scitldr_steps_per_second": 0.831, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_samsum_accuracy": 0.6255323175925049, |
|
"eval_samsum_loss": 1.390625, |
|
"eval_samsum_runtime": 31.2731, |
|
"eval_samsum_samples_per_second": 94.234, |
|
"eval_samsum_steps_per_second": 0.991, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_debate_sum_accuracy": 0.934249098160658, |
|
"eval_debate_sum_loss": 0.363525390625, |
|
"eval_debate_sum_runtime": 539.9242, |
|
"eval_debate_sum_samples_per_second": 89.113, |
|
"eval_debate_sum_steps_per_second": 0.93, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_billsum_accuracy": 0.6761362636279469, |
|
"eval_billsum_loss": 1.3681640625, |
|
"eval_billsum_runtime": 47.9835, |
|
"eval_billsum_samples_per_second": 78.985, |
|
"eval_billsum_steps_per_second": 0.834, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_zh-en_accuracy": 0.6633805205208717, |
|
"eval_wmt2019_zh-en_loss": 1.474609375, |
|
"eval_wmt2019_zh-en_runtime": 27.1758, |
|
"eval_wmt2019_zh-en_samples_per_second": 146.491, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.545, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_ru-en_accuracy": 0.7568385011868931, |
|
"eval_wmt2019_ru-en_loss": 0.9365234375, |
|
"eval_wmt2019_ru-en_runtime": 21.7646, |
|
"eval_wmt2019_ru-en_samples_per_second": 137.839, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.47, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_de-en_accuracy": 0.7579152898768399, |
|
"eval_wmt2019_de-en_loss": 0.94921875, |
|
"eval_wmt2019_de-en_runtime": 15.095, |
|
"eval_wmt2019_de-en_samples_per_second": 198.609, |
|
"eval_wmt2019_de-en_steps_per_second": 2.12, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_fr-de_accuracy": 0.7458755561047948, |
|
"eval_wmt2019_fr-de_loss": 1.0107421875, |
|
"eval_wmt2019_fr-de_runtime": 10.8089, |
|
"eval_wmt2019_fr-de_samples_per_second": 139.885, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.48, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_essay_instruction_accuracy": 0.5980087566061517, |
|
"eval_essay_instruction_loss": 1.939453125, |
|
"eval_essay_instruction_runtime": 8.5102, |
|
"eval_essay_instruction_samples_per_second": 48.53, |
|
"eval_essay_instruction_steps_per_second": 0.588, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_eli5_accuracy": 0.4587571011238715, |
|
"eval_reddit_eli5_loss": 2.43359375, |
|
"eval_reddit_eli5_runtime": 592.806, |
|
"eval_reddit_eli5_samples_per_second": 91.981, |
|
"eval_reddit_eli5_steps_per_second": 0.958, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_askh_accuracy": 0.46236593037589085, |
|
"eval_reddit_askh_loss": 2.53125, |
|
"eval_reddit_askh_runtime": 245.8916, |
|
"eval_reddit_askh_samples_per_second": 80.137, |
|
"eval_reddit_askh_steps_per_second": 0.838, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_asks_accuracy": 0.4693832359074744, |
|
"eval_reddit_asks_loss": 2.390625, |
|
"eval_reddit_asks_runtime": 307.0019, |
|
"eval_reddit_asks_samples_per_second": 85.85, |
|
"eval_reddit_asks_steps_per_second": 0.896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.016555205552159e-06, |
|
"loss": 1.6024, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.044185435607626e-06, |
|
"loss": 1.6344, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.070799615107415e-06, |
|
"loss": 1.5251, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.096469827889988e-06, |
|
"loss": 1.5818, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.121260748862021e-06, |
|
"loss": 1.6331, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.145230625795312e-06, |
|
"loss": 1.6272, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.1684321036962525e-06, |
|
"loss": 1.5872, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.190912921100477e-06, |
|
"loss": 1.6504, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.212716501452232e-06, |
|
"loss": 1.566, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.233882457984791e-06, |
|
"loss": 1.6106, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.2544470268536555e-06, |
|
"loss": 1.616, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.27444344042015e-06, |
|
"loss": 1.6374, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.293902250342989e-06, |
|
"loss": 1.5941, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.312851608364853e-06, |
|
"loss": 1.6115, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.3313175112718595e-06, |
|
"loss": 1.5531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.3493240153753665e-06, |
|
"loss": 1.5554, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.366893424956263e-06, |
|
"loss": 1.5233, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.38404645837504e-06, |
|
"loss": 1.5579, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.400802394950703e-06, |
|
"loss": 1.6028, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.4171792052198945e-06, |
|
"loss": 1.5239, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.433193666783084e-06, |
|
"loss": 1.6149, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.448861467610187e-06, |
|
"loss": 1.6114, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.4641972984001906e-06, |
|
"loss": 1.6682, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.479214935357724e-06, |
|
"loss": 1.5707, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.493927314555554e-06, |
|
"loss": 1.5827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_hard_accuracy": 0.9183054435894304, |
|
"eval_gsm8k_hard_loss": 0.3740234375, |
|
"eval_gsm8k_hard_runtime": 3.8504, |
|
"eval_gsm8k_hard_samples_per_second": 68.565, |
|
"eval_gsm8k_hard_steps_per_second": 0.779, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_webgpt_accuracy": 0.502614994399395, |
|
"eval_webgpt_loss": 2.185546875, |
|
"eval_webgpt_runtime": 37.4417, |
|
"eval_webgpt_samples_per_second": 104.589, |
|
"eval_webgpt_steps_per_second": 1.095, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_squad_v2_accuracy": 0.8982722417170479, |
|
"eval_squad_v2_loss": 0.33447265625, |
|
"eval_squad_v2_runtime": 214.9352, |
|
"eval_squad_v2_samples_per_second": 121.264, |
|
"eval_squad_v2_steps_per_second": 1.265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_adversarial_qa_accuracy": 0.8046625473866452, |
|
"eval_adversarial_qa_loss": 0.8486328125, |
|
"eval_adversarial_qa_runtime": 51.9881, |
|
"eval_adversarial_qa_samples_per_second": 115.411, |
|
"eval_adversarial_qa_steps_per_second": 1.212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_private_tuning_accuracy": 0.6754279825666092, |
|
"eval_private_tuning_loss": 1.20703125, |
|
"eval_private_tuning_runtime": 143.688, |
|
"eval_private_tuning_samples_per_second": 147.389, |
|
"eval_private_tuning_steps_per_second": 1.538, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_oa_translated_accuracy": 0.6956755454438557, |
|
"eval_oa_translated_loss": 1.2421875, |
|
"eval_oa_translated_runtime": 1298.0566, |
|
"eval_oa_translated_samples_per_second": 90.391, |
|
"eval_oa_translated_steps_per_second": 0.942, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_prosocial_dialogue_accuracy": 0.5306487253309804, |
|
"eval_prosocial_dialogue_loss": 1.783203125, |
|
"eval_prosocial_dialogue_runtime": 62.6995, |
|
"eval_prosocial_dialogue_samples_per_second": 430.355, |
|
"eval_prosocial_dialogue_steps_per_second": 4.498, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_math_qa_accuracy": 0.573035368807606, |
|
"eval_math_qa_loss": 1.849609375, |
|
"eval_math_qa_runtime": 42.0578, |
|
"eval_math_qa_samples_per_second": 141.9, |
|
"eval_math_qa_steps_per_second": 1.498, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wikihow_accuracy": 0.6166412425461101, |
|
"eval_wikihow_loss": 1.8369140625, |
|
"eval_wikihow_runtime": 17.5874, |
|
"eval_wikihow_samples_per_second": 130.377, |
|
"eval_wikihow_steps_per_second": 1.365, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_joke_accuracy": 0.500284306292646, |
|
"eval_joke_loss": 2.1875, |
|
"eval_joke_runtime": 1.5291, |
|
"eval_joke_samples_per_second": 49.704, |
|
"eval_joke_steps_per_second": 0.654, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_accuracy": 0.7605687018093785, |
|
"eval_gsm8k_loss": 0.91357421875, |
|
"eval_gsm8k_runtime": 11.3759, |
|
"eval_gsm8k_samples_per_second": 131.418, |
|
"eval_gsm8k_steps_per_second": 1.406, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-hi_accuracy": 0.6839278864595321, |
|
"eval_ted_trans_en-hi_loss": 1.142578125, |
|
"eval_ted_trans_en-hi_runtime": 2.7736, |
|
"eval_ted_trans_en-hi_samples_per_second": 37.135, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.721, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_de-ja_accuracy": 0.6501228312605558, |
|
"eval_ted_trans_de-ja_loss": 1.5048828125, |
|
"eval_ted_trans_de-ja_runtime": 8.2515, |
|
"eval_ted_trans_de-ja_samples_per_second": 87.014, |
|
"eval_ted_trans_de-ja_steps_per_second": 0.97, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_nl-en_accuracy": 0.7532021898001414, |
|
"eval_ted_trans_nl-en_loss": 1.0654296875, |
|
"eval_ted_trans_nl-en_runtime": 7.9186, |
|
"eval_ted_trans_nl-en_samples_per_second": 97.365, |
|
"eval_ted_trans_nl-en_steps_per_second": 1.137, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ja_accuracy": 0.6662950575994054, |
|
"eval_ted_trans_en-ja_loss": 1.3916015625, |
|
"eval_ted_trans_en-ja_runtime": 9.7107, |
|
"eval_ted_trans_en-ja_samples_per_second": 82.486, |
|
"eval_ted_trans_en-ja_steps_per_second": 0.927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-es_accuracy": 0.7895431674388482, |
|
"eval_ted_trans_en-es_loss": 0.87646484375, |
|
"eval_ted_trans_en-es_runtime": 9.3046, |
|
"eval_ted_trans_en-es_samples_per_second": 88.774, |
|
"eval_ted_trans_en-es_steps_per_second": 0.967, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ms_accuracy": 0.692143238909674, |
|
"eval_ted_trans_en-ms_loss": 1.36328125, |
|
"eval_ted_trans_en-ms_runtime": 1.0241, |
|
"eval_ted_trans_en-ms_samples_per_second": 41.011, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.976, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_xsum_accuracy": 0.621155930594, |
|
"eval_xsum_loss": 1.4501953125, |
|
"eval_xsum_runtime": 440.7691, |
|
"eval_xsum_samples_per_second": 92.586, |
|
"eval_xsum_steps_per_second": 0.966, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_cnn_dailymail_accuracy": 0.6818918043407839, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 631.5892, |
|
"eval_cnn_dailymail_samples_per_second": 90.918, |
|
"eval_cnn_dailymail_steps_per_second": 0.948, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_multi_news_accuracy": 0.5512987425377873, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 102.5343, |
|
"eval_multi_news_samples_per_second": 87.727, |
|
"eval_multi_news_steps_per_second": 0.917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_tldr_news_accuracy": 0.5479638860152356, |
|
"eval_tldr_news_loss": 2.09375, |
|
"eval_tldr_news_runtime": 7.6366, |
|
"eval_tldr_news_samples_per_second": 186.994, |
|
"eval_tldr_news_steps_per_second": 1.964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_scitldr_accuracy": 0.4991896272285251, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 5.9643, |
|
"eval_scitldr_samples_per_second": 66.899, |
|
"eval_scitldr_steps_per_second": 0.838, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_samsum_accuracy": 0.6392542821992997, |
|
"eval_samsum_loss": 1.3603515625, |
|
"eval_samsum_runtime": 31.1036, |
|
"eval_samsum_samples_per_second": 94.748, |
|
"eval_samsum_steps_per_second": 0.997, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_debate_sum_accuracy": 0.9375640253883767, |
|
"eval_debate_sum_loss": 0.34521484375, |
|
"eval_debate_sum_runtime": 548.5555, |
|
"eval_debate_sum_samples_per_second": 87.71, |
|
"eval_debate_sum_steps_per_second": 0.915, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_billsum_accuracy": 0.6806867345609693, |
|
"eval_billsum_loss": 1.3427734375, |
|
"eval_billsum_runtime": 43.496, |
|
"eval_billsum_samples_per_second": 87.134, |
|
"eval_billsum_steps_per_second": 0.92, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_zh-en_accuracy": 0.6670238429829493, |
|
"eval_wmt2019_zh-en_loss": 1.453125, |
|
"eval_wmt2019_zh-en_runtime": 28.9371, |
|
"eval_wmt2019_zh-en_samples_per_second": 137.574, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_ru-en_accuracy": 0.7587101830765136, |
|
"eval_wmt2019_ru-en_loss": 0.92724609375, |
|
"eval_wmt2019_ru-en_runtime": 23.7381, |
|
"eval_wmt2019_ru-en_samples_per_second": 126.379, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_de-en_accuracy": 0.7675478121558026, |
|
"eval_wmt2019_de-en_loss": 0.90478515625, |
|
"eval_wmt2019_de-en_runtime": 16.2264, |
|
"eval_wmt2019_de-en_samples_per_second": 184.76, |
|
"eval_wmt2019_de-en_steps_per_second": 1.972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_fr-de_accuracy": 0.7500888456249324, |
|
"eval_wmt2019_fr-de_loss": 0.99560546875, |
|
"eval_wmt2019_fr-de_runtime": 11.5712, |
|
"eval_wmt2019_fr-de_samples_per_second": 130.669, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.383, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_essay_instruction_accuracy": 0.6002366052672313, |
|
"eval_essay_instruction_loss": 1.9189453125, |
|
"eval_essay_instruction_runtime": 8.0794, |
|
"eval_essay_instruction_samples_per_second": 51.118, |
|
"eval_essay_instruction_steps_per_second": 0.619, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_eli5_accuracy": 0.46082089893518746, |
|
"eval_reddit_eli5_loss": 2.4296875, |
|
"eval_reddit_eli5_runtime": 602.6271, |
|
"eval_reddit_eli5_samples_per_second": 90.482, |
|
"eval_reddit_eli5_steps_per_second": 0.943, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_askh_accuracy": 0.46347532552175574, |
|
"eval_reddit_askh_loss": 2.52734375, |
|
"eval_reddit_askh_runtime": 245.7671, |
|
"eval_reddit_askh_samples_per_second": 80.178, |
|
"eval_reddit_askh_steps_per_second": 0.838, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_asks_accuracy": 0.47150193020881753, |
|
"eval_reddit_asks_loss": 2.38671875, |
|
"eval_reddit_asks_runtime": 320.7509, |
|
"eval_reddit_asks_samples_per_second": 82.17, |
|
"eval_reddit_asks_steps_per_second": 0.857, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5083465988888945e-06, |
|
"loss": 1.5195, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5224842384899045e-06, |
|
"loss": 1.492, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5363510253542444e-06, |
|
"loss": 1.5302, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.549957142832593e-06, |
|
"loss": 1.5267, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.563312210555719e-06, |
|
"loss": 1.565, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.576425325289549e-06, |
|
"loss": 1.6208, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.589305098154845e-06, |
|
"loss": 1.6341, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.601959688592886e-06, |
|
"loss": 1.5639, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.614396835412691e-06, |
|
"loss": 1.6218, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.626623885215616e-06, |
|
"loss": 1.5995, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.638647818458763e-06, |
|
"loss": 1.6176, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.650475273388737e-06, |
|
"loss": 1.5944, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.662112568051194e-06, |
|
"loss": 1.6074, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.673565720558918e-06, |
|
"loss": 1.5783, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6848404677811685e-06, |
|
"loss": 1.5135, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.695942282599635e-06, |
|
"loss": 1.6396, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.706876389860915e-06, |
|
"loss": 1.6053, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.717647781141908e-06, |
|
"loss": 1.5982, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.7282612284325845e-06, |
|
"loss": 1.5361, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.738721296830016e-06, |
|
"loss": 1.5127, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.749032356328167e-06, |
|
"loss": 1.4852, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.759198592779668e-06, |
|
"loss": 1.5432, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.769224018098397e-06, |
|
"loss": 1.5425, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.7791124797650865e-06, |
|
"loss": 1.493, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.788867669692332e-06, |
|
"loss": 1.5065, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_gsm8k_hard_accuracy": 0.9174097145881682, |
|
"eval_gsm8k_hard_loss": 0.366455078125, |
|
"eval_gsm8k_hard_runtime": 6.7984, |
|
"eval_gsm8k_hard_samples_per_second": 38.833, |
|
"eval_gsm8k_hard_steps_per_second": 0.441, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_webgpt_accuracy": 0.5023221414992414, |
|
"eval_webgpt_loss": 2.181640625, |
|
"eval_webgpt_runtime": 39.4537, |
|
"eval_webgpt_samples_per_second": 99.256, |
|
"eval_webgpt_steps_per_second": 1.039, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_squad_v2_accuracy": 0.8977014895925817, |
|
"eval_squad_v2_loss": 0.331787109375, |
|
"eval_squad_v2_runtime": 214.9281, |
|
"eval_squad_v2_samples_per_second": 121.268, |
|
"eval_squad_v2_steps_per_second": 1.266, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_adversarial_qa_accuracy": 0.8063639891346527, |
|
"eval_adversarial_qa_loss": 0.8232421875, |
|
"eval_adversarial_qa_runtime": 51.9182, |
|
"eval_adversarial_qa_samples_per_second": 115.567, |
|
"eval_adversarial_qa_steps_per_second": 1.213, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_private_tuning_accuracy": 0.6775308778615678, |
|
"eval_private_tuning_loss": 1.1962890625, |
|
"eval_private_tuning_runtime": 149.5702, |
|
"eval_private_tuning_samples_per_second": 141.592, |
|
"eval_private_tuning_steps_per_second": 1.478, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_oa_translated_accuracy": 0.6986534506008611, |
|
"eval_oa_translated_loss": 1.22265625, |
|
"eval_oa_translated_runtime": 1324.5514, |
|
"eval_oa_translated_samples_per_second": 89.655, |
|
"eval_oa_translated_steps_per_second": 0.935, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_prosocial_dialogue_accuracy": 0.5327101026505052, |
|
"eval_prosocial_dialogue_loss": 1.7802734375, |
|
"eval_prosocial_dialogue_runtime": 70.7166, |
|
"eval_prosocial_dialogue_samples_per_second": 381.565, |
|
"eval_prosocial_dialogue_steps_per_second": 3.988, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_math_qa_accuracy": 0.5798378605476227, |
|
"eval_math_qa_loss": 1.826171875, |
|
"eval_math_qa_runtime": 44.6748, |
|
"eval_math_qa_samples_per_second": 133.588, |
|
"eval_math_qa_steps_per_second": 1.41, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wikihow_accuracy": 0.6193731798640966, |
|
"eval_wikihow_loss": 1.802734375, |
|
"eval_wikihow_runtime": 16.8626, |
|
"eval_wikihow_samples_per_second": 135.981, |
|
"eval_wikihow_steps_per_second": 1.423, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_joke_accuracy": 0.5020849128127369, |
|
"eval_joke_loss": 2.1640625, |
|
"eval_joke_runtime": 1.3597, |
|
"eval_joke_samples_per_second": 55.896, |
|
"eval_joke_steps_per_second": 0.735, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_gsm8k_accuracy": 0.760008955934006, |
|
"eval_gsm8k_loss": 0.9189453125, |
|
"eval_gsm8k_runtime": 12.0443, |
|
"eval_gsm8k_samples_per_second": 124.126, |
|
"eval_gsm8k_steps_per_second": 1.328, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-hi_accuracy": 0.6714796661809511, |
|
"eval_ted_trans_en-hi_loss": 1.2548828125, |
|
"eval_ted_trans_en-hi_runtime": 2.3695, |
|
"eval_ted_trans_en-hi_samples_per_second": 43.47, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.844, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_de-ja_accuracy": 0.6580367185861629, |
|
"eval_ted_trans_de-ja_loss": 1.466796875, |
|
"eval_ted_trans_de-ja_runtime": 9.4824, |
|
"eval_ted_trans_de-ja_samples_per_second": 75.72, |
|
"eval_ted_trans_de-ja_steps_per_second": 0.844, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_nl-en_accuracy": 0.749224515991015, |
|
"eval_ted_trans_nl-en_loss": 1.080078125, |
|
"eval_ted_trans_nl-en_runtime": 8.4451, |
|
"eval_ted_trans_nl-en_samples_per_second": 91.296, |
|
"eval_ted_trans_nl-en_steps_per_second": 1.066, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-ja_accuracy": 0.6621738060068931, |
|
"eval_ted_trans_en-ja_loss": 1.384765625, |
|
"eval_ted_trans_en-ja_runtime": 10.0893, |
|
"eval_ted_trans_en-ja_samples_per_second": 79.391, |
|
"eval_ted_trans_en-ja_steps_per_second": 0.892, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-es_accuracy": 0.793457991028678, |
|
"eval_ted_trans_en-es_loss": 0.85205078125, |
|
"eval_ted_trans_en-es_runtime": 7.1771, |
|
"eval_ted_trans_en-es_samples_per_second": 115.088, |
|
"eval_ted_trans_en-es_steps_per_second": 1.254, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-ms_accuracy": 0.6782511210762332, |
|
"eval_ted_trans_en-ms_loss": 1.4208984375, |
|
"eval_ted_trans_en-ms_runtime": 2.0842, |
|
"eval_ted_trans_en-ms_samples_per_second": 20.151, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.48, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_xsum_accuracy": 0.6225130561751453, |
|
"eval_xsum_loss": 1.4462890625, |
|
"eval_xsum_runtime": 439.444, |
|
"eval_xsum_samples_per_second": 92.865, |
|
"eval_xsum_steps_per_second": 0.969, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_cnn_dailymail_accuracy": 0.6778638530242029, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 633.9568, |
|
"eval_cnn_dailymail_samples_per_second": 90.579, |
|
"eval_cnn_dailymail_steps_per_second": 0.945, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_multi_news_accuracy": 0.5553791439095643, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 103.0316, |
|
"eval_multi_news_samples_per_second": 87.303, |
|
"eval_multi_news_steps_per_second": 0.912, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_tldr_news_accuracy": 0.5591084360011286, |
|
"eval_tldr_news_loss": 1.9892578125, |
|
"eval_tldr_news_runtime": 8.3367, |
|
"eval_tldr_news_samples_per_second": 171.29, |
|
"eval_tldr_news_steps_per_second": 1.799, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_scitldr_accuracy": 0.49270664505672607, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 5.8517, |
|
"eval_scitldr_samples_per_second": 68.186, |
|
"eval_scitldr_steps_per_second": 0.854, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_samsum_accuracy": 0.6411875245035082, |
|
"eval_samsum_loss": 1.32421875, |
|
"eval_samsum_runtime": 32.2591, |
|
"eval_samsum_samples_per_second": 91.354, |
|
"eval_samsum_steps_per_second": 0.961, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_debate_sum_accuracy": 0.9381249710591028, |
|
"eval_debate_sum_loss": 0.337646484375, |
|
"eval_debate_sum_runtime": 548.1225, |
|
"eval_debate_sum_samples_per_second": 87.78, |
|
"eval_debate_sum_steps_per_second": 0.916, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_billsum_accuracy": 0.6810246806233696, |
|
"eval_billsum_loss": 1.3359375, |
|
"eval_billsum_runtime": 50.0247, |
|
"eval_billsum_samples_per_second": 75.763, |
|
"eval_billsum_steps_per_second": 0.8, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_zh-en_accuracy": 0.6683125468349724, |
|
"eval_wmt2019_zh-en_loss": 1.451171875, |
|
"eval_wmt2019_zh-en_runtime": 27.2087, |
|
"eval_wmt2019_zh-en_samples_per_second": 146.313, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.544, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_ru-en_accuracy": 0.755552089368213, |
|
"eval_wmt2019_ru-en_loss": 0.94091796875, |
|
"eval_wmt2019_ru-en_runtime": 20.7954, |
|
"eval_wmt2019_ru-en_samples_per_second": 144.262, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.539, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_de-en_accuracy": 0.7641599453590333, |
|
"eval_wmt2019_de-en_loss": 0.9228515625, |
|
"eval_wmt2019_de-en_runtime": 15.5528, |
|
"eval_wmt2019_de-en_samples_per_second": 192.762, |
|
"eval_wmt2019_de-en_steps_per_second": 2.058, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_fr-de_accuracy": 0.7474449624849476, |
|
"eval_wmt2019_fr-de_loss": 1.00390625, |
|
"eval_wmt2019_fr-de_runtime": 11.5093, |
|
"eval_wmt2019_fr-de_samples_per_second": 131.372, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.39, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_essay_instruction_accuracy": 0.6032218119098689, |
|
"eval_essay_instruction_loss": 1.904296875, |
|
"eval_essay_instruction_runtime": 7.606, |
|
"eval_essay_instruction_samples_per_second": 54.299, |
|
"eval_essay_instruction_steps_per_second": 0.657, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_eli5_accuracy": 0.4612608360817972, |
|
"eval_reddit_eli5_loss": 2.431640625, |
|
"eval_reddit_eli5_runtime": 597.1988, |
|
"eval_reddit_eli5_samples_per_second": 91.305, |
|
"eval_reddit_eli5_steps_per_second": 0.951, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_askh_accuracy": 0.46371300245115404, |
|
"eval_reddit_askh_loss": 2.525390625, |
|
"eval_reddit_askh_runtime": 253.0373, |
|
"eval_reddit_askh_samples_per_second": 77.874, |
|
"eval_reddit_askh_steps_per_second": 0.814, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_asks_accuracy": 0.47195547000535765, |
|
"eval_reddit_asks_loss": 2.388671875, |
|
"eval_reddit_asks_runtime": 304.1555, |
|
"eval_reddit_asks_samples_per_second": 86.653, |
|
"eval_reddit_asks_steps_per_second": 0.904, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.798493132500121e-06, |
|
"loss": 1.5526, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8079922732483016e-06, |
|
"loss": 1.4845, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.817368364668191e-06, |
|
"loss": 1.5351, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8266245539317745e-06, |
|
"loss": 1.5942, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.835763868993521e-06, |
|
"loss": 1.4886, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.844789224536785e-06, |
|
"loss": 1.5645, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.853703427554027e-06, |
|
"loss": 1.5099, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.862509182587578e-06, |
|
"loss": 1.619, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.871209096655434e-06, |
|
"loss": 1.542, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.879805683884512e-06, |
|
"loss": 1.5254, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.888301369871998e-06, |
|
"loss": 1.5427, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8966984957936845e-06, |
|
"loss": 1.5403, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.904999322276735e-06, |
|
"loss": 1.5848, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.913206033052878e-06, |
|
"loss": 1.5205, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.921320738406821e-06, |
|
"loss": 1.5359, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.929345478433492e-06, |
|
"loss": 1.5631, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.937282226116702e-06, |
|
"loss": 1.5928, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.945132890240829e-06, |
|
"loss": 1.4707, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.952899318146298e-06, |
|
"loss": 1.5279, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.96058329833879e-06, |
|
"loss": 1.5411, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.968186562961406e-06, |
|
"loss": 1.6029, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.975710790138337e-06, |
|
"loss": 1.648, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9831576061979556e-06, |
|
"loss": 1.5799, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.990528587782728e-06, |
|
"loss": 1.5592, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.99782526385276e-06, |
|
"loss": 1.6317, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_gsm8k_hard_accuracy": 0.9242294694841415, |
|
"eval_gsm8k_hard_loss": 0.337158203125, |
|
"eval_gsm8k_hard_runtime": 4.4214, |
|
"eval_gsm8k_hard_samples_per_second": 59.709, |
|
"eval_gsm8k_hard_steps_per_second": 0.679, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_webgpt_accuracy": 0.5016719087887306, |
|
"eval_webgpt_loss": 2.181640625, |
|
"eval_webgpt_runtime": 36.3649, |
|
"eval_webgpt_samples_per_second": 107.686, |
|
"eval_webgpt_steps_per_second": 1.127, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_squad_v2_accuracy": 0.9092151805972463, |
|
"eval_squad_v2_loss": 0.35546875, |
|
"eval_squad_v2_runtime": 216.2111, |
|
"eval_squad_v2_samples_per_second": 120.549, |
|
"eval_squad_v2_steps_per_second": 1.258, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_adversarial_qa_accuracy": 0.8333781081161756, |
|
"eval_adversarial_qa_loss": 0.83642578125, |
|
"eval_adversarial_qa_runtime": 52.2959, |
|
"eval_adversarial_qa_samples_per_second": 114.732, |
|
"eval_adversarial_qa_steps_per_second": 1.205, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_private_tuning_accuracy": 0.6788522917969135, |
|
"eval_private_tuning_loss": 1.1845703125, |
|
"eval_private_tuning_runtime": 145.6236, |
|
"eval_private_tuning_samples_per_second": 145.43, |
|
"eval_private_tuning_steps_per_second": 1.518, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_oa_translated_accuracy": 0.7015835515263078, |
|
"eval_oa_translated_loss": 1.208984375, |
|
"eval_oa_translated_runtime": 1331.5436, |
|
"eval_oa_translated_samples_per_second": 89.184, |
|
"eval_oa_translated_steps_per_second": 0.93, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_prosocial_dialogue_accuracy": 0.5440172516743936, |
|
"eval_prosocial_dialogue_loss": 1.7470703125, |
|
"eval_prosocial_dialogue_runtime": 66.8792, |
|
"eval_prosocial_dialogue_samples_per_second": 403.459, |
|
"eval_prosocial_dialogue_steps_per_second": 4.217, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_math_qa_accuracy": 0.5903696634283728, |
|
"eval_math_qa_loss": 1.7734375, |
|
"eval_math_qa_runtime": 43.3917, |
|
"eval_math_qa_samples_per_second": 137.538, |
|
"eval_math_qa_steps_per_second": 1.452, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_wikihow_accuracy": 0.6181528220773818, |
|
"eval_wikihow_loss": 1.79296875, |
|
"eval_wikihow_runtime": 16.8686, |
|
"eval_wikihow_samples_per_second": 135.933, |
|
"eval_wikihow_steps_per_second": 1.423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_joke_accuracy": 0.5162054586808188, |
|
"eval_joke_loss": 2.095703125, |
|
"eval_joke_runtime": 1.1499, |
|
"eval_joke_samples_per_second": 66.094, |
|
"eval_joke_steps_per_second": 0.87, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_gsm8k_accuracy": 0.7709449909740977, |
|
"eval_gsm8k_loss": 0.8671875, |
|
"eval_gsm8k_runtime": 11.5578, |
|
"eval_gsm8k_samples_per_second": 129.35, |
|
"eval_gsm8k_steps_per_second": 1.384, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_en-hi_accuracy": 0.6727249123718032, |
|
"eval_ted_trans_en-hi_loss": 1.2490234375, |
|
"eval_ted_trans_en-hi_runtime": 3.3114, |
|
"eval_ted_trans_en-hi_samples_per_second": 31.105, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.604, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_de-ja_accuracy": 0.6605206483545547, |
|
"eval_ted_trans_de-ja_loss": 1.4599609375, |
|
"eval_ted_trans_de-ja_runtime": 8.583, |
|
"eval_ted_trans_de-ja_samples_per_second": 83.654, |
|
"eval_ted_trans_de-ja_steps_per_second": 0.932, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_nl-en_accuracy": 0.757177992835374, |
|
"eval_ted_trans_nl-en_loss": 1.0478515625, |
|
"eval_ted_trans_nl-en_runtime": 8.7612, |
|
"eval_ted_trans_nl-en_samples_per_second": 88.002, |
|
"eval_ted_trans_nl-en_steps_per_second": 1.027, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_en-ja_accuracy": 0.6644915715062534, |
|
"eval_ted_trans_en-ja_loss": 1.3798828125, |
|
"eval_ted_trans_en-ja_runtime": 9.6809, |
|
"eval_ted_trans_en-ja_samples_per_second": 82.74, |
|
"eval_ted_trans_en-ja_steps_per_second": 0.93, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_en-es_accuracy": 0.7831230683487865, |
|
"eval_ted_trans_en-es_loss": 0.89501953125, |
|
"eval_ted_trans_en-es_runtime": 8.1422, |
|
"eval_ted_trans_en-es_samples_per_second": 101.447, |
|
"eval_ted_trans_en-es_steps_per_second": 1.105, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_ted_trans_en-ms_accuracy": 0.6917040358744395, |
|
"eval_ted_trans_en-ms_loss": 1.3955078125, |
|
"eval_ted_trans_en-ms_runtime": 0.7332, |
|
"eval_ted_trans_en-ms_samples_per_second": 57.285, |
|
"eval_ted_trans_en-ms_steps_per_second": 1.364, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_xsum_accuracy": 0.6225837900623918, |
|
"eval_xsum_loss": 1.4453125, |
|
"eval_xsum_runtime": 443.13, |
|
"eval_xsum_samples_per_second": 92.093, |
|
"eval_xsum_steps_per_second": 0.961, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_cnn_dailymail_accuracy": 0.6811569253551761, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 634.279, |
|
"eval_cnn_dailymail_samples_per_second": 90.533, |
|
"eval_cnn_dailymail_steps_per_second": 0.944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_multi_news_accuracy": 0.5572843896862695, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 104.4536, |
|
"eval_multi_news_samples_per_second": 86.115, |
|
"eval_multi_news_steps_per_second": 0.9, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_tldr_news_accuracy": 0.5934825543120474, |
|
"eval_tldr_news_loss": 1.779296875, |
|
"eval_tldr_news_runtime": 7.875, |
|
"eval_tldr_news_samples_per_second": 181.334, |
|
"eval_tldr_news_steps_per_second": 1.905, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_scitldr_accuracy": 0.49756888168557534, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 5.4836, |
|
"eval_scitldr_samples_per_second": 72.763, |
|
"eval_scitldr_steps_per_second": 0.912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_samsum_accuracy": 0.6441076667252498, |
|
"eval_samsum_loss": 1.3203125, |
|
"eval_samsum_runtime": 31.9228, |
|
"eval_samsum_samples_per_second": 92.317, |
|
"eval_samsum_steps_per_second": 0.971, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_debate_sum_accuracy": 0.9393930900916929, |
|
"eval_debate_sum_loss": 0.327392578125, |
|
"eval_debate_sum_runtime": 546.4353, |
|
"eval_debate_sum_samples_per_second": 88.051, |
|
"eval_debate_sum_steps_per_second": 0.919, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_billsum_accuracy": 0.6859647270039075, |
|
"eval_billsum_loss": 1.3212890625, |
|
"eval_billsum_runtime": 47.3064, |
|
"eval_billsum_samples_per_second": 80.116, |
|
"eval_billsum_steps_per_second": 0.846, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_wmt2019_zh-en_accuracy": 0.6666222464280335, |
|
"eval_wmt2019_zh-en_loss": 1.4609375, |
|
"eval_wmt2019_zh-en_runtime": 27.4142, |
|
"eval_wmt2019_zh-en_samples_per_second": 145.217, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.532, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_wmt2019_ru-en_accuracy": 0.7586163428740916, |
|
"eval_wmt2019_ru-en_loss": 0.93212890625, |
|
"eval_wmt2019_ru-en_runtime": 22.6757, |
|
"eval_wmt2019_ru-en_samples_per_second": 132.3, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.411, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_wmt2019_de-en_accuracy": 0.7644713185146496, |
|
"eval_wmt2019_de-en_loss": 0.92724609375, |
|
"eval_wmt2019_de-en_runtime": 15.456, |
|
"eval_wmt2019_de-en_samples_per_second": 193.97, |
|
"eval_wmt2019_de-en_steps_per_second": 2.07, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_wmt2019_fr-de_accuracy": 0.7478946231915353, |
|
"eval_wmt2019_fr-de_loss": 1.0068359375, |
|
"eval_wmt2019_fr-de_runtime": 10.3196, |
|
"eval_wmt2019_fr-de_samples_per_second": 146.518, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.55, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_essay_instruction_accuracy": 0.6048415629215222, |
|
"eval_essay_instruction_loss": 1.8955078125, |
|
"eval_essay_instruction_runtime": 9.0231, |
|
"eval_essay_instruction_samples_per_second": 45.771, |
|
"eval_essay_instruction_steps_per_second": 0.554, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_reddit_eli5_accuracy": 0.4608785206404607, |
|
"eval_reddit_eli5_loss": 2.4296875, |
|
"eval_reddit_eli5_runtime": 597.6745, |
|
"eval_reddit_eli5_samples_per_second": 91.232, |
|
"eval_reddit_eli5_steps_per_second": 0.95, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_reddit_askh_accuracy": 0.4638486660338061, |
|
"eval_reddit_askh_loss": 2.5234375, |
|
"eval_reddit_askh_runtime": 248.9187, |
|
"eval_reddit_askh_samples_per_second": 79.162, |
|
"eval_reddit_askh_steps_per_second": 0.828, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_reddit_asks_accuracy": 0.4715865474658219, |
|
"eval_reddit_asks_loss": 2.384765625, |
|
"eval_reddit_asks_runtime": 310.7333, |
|
"eval_reddit_asks_samples_per_second": 84.819, |
|
"eval_reddit_asks_steps_per_second": 0.885, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 67822, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.7293861155088892e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|