|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8212414090449476, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.0000000000000004e-08, |
|
"loss": 2.1648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.500000000000001e-08, |
|
"loss": 2.131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2500000000000002e-07, |
|
"loss": 1.9325, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7500000000000002e-07, |
|
"loss": 1.8743, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.2500000000000002e-07, |
|
"loss": 1.8232, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.75e-07, |
|
"loss": 1.7315, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.25e-07, |
|
"loss": 1.656, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.75e-07, |
|
"loss": 1.6538, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.2500000000000006e-07, |
|
"loss": 1.5483, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.7500000000000006e-07, |
|
"loss": 1.5073, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.250000000000001e-07, |
|
"loss": 1.501, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.750000000000001e-07, |
|
"loss": 1.4804, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.4357, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.750000000000001e-07, |
|
"loss": 1.424, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.25e-07, |
|
"loss": 1.4579, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.750000000000001e-07, |
|
"loss": 1.4185, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.250000000000001e-07, |
|
"loss": 1.4141, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.75e-07, |
|
"loss": 1.4098, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.25e-07, |
|
"loss": 1.4144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.7e-07, |
|
"loss": 1.3644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.02e-06, |
|
"loss": 1.3524, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0700000000000001e-06, |
|
"loss": 1.3403, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.12e-06, |
|
"loss": 1.3355, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1700000000000002e-06, |
|
"loss": 1.3448, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2200000000000002e-06, |
|
"loss": 1.322, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2700000000000001e-06, |
|
"loss": 1.3186, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.32e-06, |
|
"loss": 1.3038, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.3700000000000002e-06, |
|
"loss": 1.2853, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.42e-06, |
|
"loss": 1.2939, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4700000000000001e-06, |
|
"loss": 1.2918, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.52e-06, |
|
"loss": 1.2976, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.5700000000000002e-06, |
|
"loss": 1.3128, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.6200000000000002e-06, |
|
"loss": 1.2433, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.6700000000000003e-06, |
|
"loss": 1.2978, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.72e-06, |
|
"loss": 1.2964, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.77e-06, |
|
"loss": 1.2625, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8200000000000002e-06, |
|
"loss": 1.2837, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.87e-06, |
|
"loss": 1.2995, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 1.2706, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.97e-06, |
|
"loss": 1.2819, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.02e-06, |
|
"loss": 1.2522, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.07e-06, |
|
"loss": 1.2955, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.12e-06, |
|
"loss": 1.2506, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.17e-06, |
|
"loss": 1.249, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.2200000000000003e-06, |
|
"loss": 1.2413, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.2700000000000003e-06, |
|
"loss": 1.2463, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.3200000000000002e-06, |
|
"loss": 1.288, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.37e-06, |
|
"loss": 1.2531, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.42e-06, |
|
"loss": 1.2314, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.47e-06, |
|
"loss": 1.2369, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_multi_news_accuracy": 0.5592306537314586, |
|
"eval_multi_news_loss": 1.919921875, |
|
"eval_multi_news_runtime": 374.7444, |
|
"eval_multi_news_samples_per_second": 15.002, |
|
"eval_multi_news_steps_per_second": 1.876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_samsum_accuracy": 0.630043040249728, |
|
"eval_samsum_loss": 1.3271484375, |
|
"eval_samsum_runtime": 37.5336, |
|
"eval_samsum_samples_per_second": 21.794, |
|
"eval_samsum_steps_per_second": 2.744, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_billsum_accuracy": 0.6415100921468554, |
|
"eval_billsum_loss": 1.4970703125, |
|
"eval_billsum_runtime": 204.4286, |
|
"eval_billsum_samples_per_second": 15.991, |
|
"eval_billsum_steps_per_second": 2.001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_wmt2019_zh-en_accuracy": 0.5844479239374446, |
|
"eval_wmt2019_zh-en_loss": 1.89453125, |
|
"eval_wmt2019_zh-en_runtime": 43.2897, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.1, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.888, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_en-ja_accuracy": 0.5366497079329188, |
|
"eval_ted_trans_en-ja_loss": 2.01953125, |
|
"eval_ted_trans_en-ja_runtime": 36.4641, |
|
"eval_ted_trans_en-ja_samples_per_second": 21.967, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.77, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ted_trans_zh-ja_accuracy": 0.44175365344467643, |
|
"eval_ted_trans_zh-ja_loss": 2.703125, |
|
"eval_ted_trans_zh-ja_runtime": 2.2264, |
|
"eval_ted_trans_zh-ja_samples_per_second": 18.864, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.695, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_sharegpt_accuracy": 0.7056496488080175, |
|
"eval_sharegpt_loss": 1.1474609375, |
|
"eval_sharegpt_runtime": 735.7691, |
|
"eval_sharegpt_samples_per_second": 4.55, |
|
"eval_sharegpt_steps_per_second": 0.569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_dolly15k_accuracy": 0.5961999725877193, |
|
"eval_dolly15k_loss": 1.6650390625, |
|
"eval_dolly15k_runtime": 33.9484, |
|
"eval_dolly15k_samples_per_second": 22.122, |
|
"eval_dolly15k_steps_per_second": 2.769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_ikala_accuracy": 0.7306054447586751, |
|
"eval_ikala_loss": 1.0380859375, |
|
"eval_ikala_runtime": 887.5903, |
|
"eval_ikala_samples_per_second": 16.005, |
|
"eval_ikala_steps_per_second": 2.001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_oasst_export_accuracy": 0.656822117898619, |
|
"eval_oasst_export_loss": 1.60546875, |
|
"eval_oasst_export_runtime": 134.1688, |
|
"eval_oasst_export_samples_per_second": 15.644, |
|
"eval_oasst_export_steps_per_second": 1.96, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_joke_accuracy": 0.48218347232752085, |
|
"eval_joke_loss": 2.29296875, |
|
"eval_joke_runtime": 3.5706, |
|
"eval_joke_samples_per_second": 21.285, |
|
"eval_joke_steps_per_second": 2.801, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gsm8k_accuracy": 0.7402563310685608, |
|
"eval_gsm8k_loss": 1.0068359375, |
|
"eval_gsm8k_runtime": 56.8505, |
|
"eval_gsm8k_samples_per_second": 23.201, |
|
"eval_gsm8k_steps_per_second": 2.902, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_webgpt_accuracy": 0.4973525539337287, |
|
"eval_webgpt_loss": 2.21484375, |
|
"eval_webgpt_runtime": 155.091, |
|
"eval_webgpt_samples_per_second": 22.974, |
|
"eval_webgpt_steps_per_second": 2.876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.52e-06, |
|
"loss": 1.2409, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.5700000000000004e-06, |
|
"loss": 1.2076, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.6200000000000003e-06, |
|
"loss": 1.2425, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.6700000000000003e-06, |
|
"loss": 1.267, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7200000000000002e-06, |
|
"loss": 1.238, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7700000000000006e-06, |
|
"loss": 1.2176, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.82e-06, |
|
"loss": 1.2168, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.87e-06, |
|
"loss": 1.2262, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.92e-06, |
|
"loss": 1.2125, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.97e-06, |
|
"loss": 1.2092, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.0200000000000003e-06, |
|
"loss": 1.2521, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.0700000000000003e-06, |
|
"loss": 1.2297, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.12e-06, |
|
"loss": 1.2317, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.17e-06, |
|
"loss": 1.2225, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.2200000000000005e-06, |
|
"loss": 1.2227, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.2700000000000005e-06, |
|
"loss": 1.2172, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.3200000000000004e-06, |
|
"loss": 1.22, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.3700000000000003e-06, |
|
"loss": 1.2164, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 1.2045, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.4700000000000002e-06, |
|
"loss": 1.2334, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.52e-06, |
|
"loss": 1.1979, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.57e-06, |
|
"loss": 1.2066, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.62e-06, |
|
"loss": 1.2153, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.6700000000000004e-06, |
|
"loss": 1.2246, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.7200000000000004e-06, |
|
"loss": 1.2027, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.7700000000000003e-06, |
|
"loss": 1.233, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.820000000000001e-06, |
|
"loss": 1.2156, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.87e-06, |
|
"loss": 1.2067, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 1.2077, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.97e-06, |
|
"loss": 1.184, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0200000000000005e-06, |
|
"loss": 1.1747, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.07e-06, |
|
"loss": 1.2055, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.12e-06, |
|
"loss": 1.2137, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.17e-06, |
|
"loss": 1.1934, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.22e-06, |
|
"loss": 1.2154, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.270000000000001e-06, |
|
"loss": 1.2216, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.32e-06, |
|
"loss": 1.2002, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3700000000000005e-06, |
|
"loss": 1.1698, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.42e-06, |
|
"loss": 1.2006, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.47e-06, |
|
"loss": 1.1706, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.520000000000001e-06, |
|
"loss": 1.1898, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.57e-06, |
|
"loss": 1.1941, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.620000000000001e-06, |
|
"loss": 1.1978, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.670000000000001e-06, |
|
"loss": 1.1871, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7200000000000005e-06, |
|
"loss": 1.1673, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.77e-06, |
|
"loss": 1.1938, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.8200000000000004e-06, |
|
"loss": 1.1601, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.87e-06, |
|
"loss": 1.1815, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.92e-06, |
|
"loss": 1.1985, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 1.1755, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_multi_news_accuracy": 0.5616533126883595, |
|
"eval_multi_news_loss": 1.9033203125, |
|
"eval_multi_news_runtime": 374.666, |
|
"eval_multi_news_samples_per_second": 15.005, |
|
"eval_multi_news_steps_per_second": 1.876, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_samsum_accuracy": 0.6358605685096722, |
|
"eval_samsum_loss": 1.2763671875, |
|
"eval_samsum_runtime": 36.4854, |
|
"eval_samsum_samples_per_second": 22.42, |
|
"eval_samsum_steps_per_second": 2.823, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_billsum_accuracy": 0.645555269329641, |
|
"eval_billsum_loss": 1.466796875, |
|
"eval_billsum_runtime": 205.3486, |
|
"eval_billsum_samples_per_second": 15.919, |
|
"eval_billsum_steps_per_second": 1.992, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_wmt2019_zh-en_accuracy": 0.5821662271706222, |
|
"eval_wmt2019_zh-en_loss": 1.908203125, |
|
"eval_wmt2019_zh-en_runtime": 42.6249, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.46, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.933, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_en-ja_accuracy": 0.5513235961740165, |
|
"eval_ted_trans_en-ja_loss": 1.9208984375, |
|
"eval_ted_trans_en-ja_runtime": 35.6003, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.5, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.837, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ted_trans_zh-ja_accuracy": 0.4552332912988651, |
|
"eval_ted_trans_zh-ja_loss": 2.595703125, |
|
"eval_ted_trans_zh-ja_runtime": 2.6463, |
|
"eval_ted_trans_zh-ja_samples_per_second": 15.871, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.267, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_sharegpt_accuracy": 0.7199542010473684, |
|
"eval_sharegpt_loss": 1.0751953125, |
|
"eval_sharegpt_runtime": 733.0519, |
|
"eval_sharegpt_samples_per_second": 4.567, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_dolly15k_accuracy": 0.5963712993421053, |
|
"eval_dolly15k_loss": 1.6484375, |
|
"eval_dolly15k_runtime": 33.8269, |
|
"eval_dolly15k_samples_per_second": 22.201, |
|
"eval_dolly15k_steps_per_second": 2.779, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_ikala_accuracy": 0.7374268761235112, |
|
"eval_ikala_loss": 0.98876953125, |
|
"eval_ikala_runtime": 886.0533, |
|
"eval_ikala_samples_per_second": 16.033, |
|
"eval_ikala_steps_per_second": 2.004, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_oasst_export_accuracy": 0.6594323119298394, |
|
"eval_oasst_export_loss": 1.580078125, |
|
"eval_oasst_export_runtime": 134.3333, |
|
"eval_oasst_export_samples_per_second": 15.625, |
|
"eval_oasst_export_steps_per_second": 1.958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_joke_accuracy": 0.4916603487490523, |
|
"eval_joke_loss": 2.20703125, |
|
"eval_joke_runtime": 3.5959, |
|
"eval_joke_samples_per_second": 21.135, |
|
"eval_joke_steps_per_second": 2.781, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gsm8k_accuracy": 0.760284126003706, |
|
"eval_gsm8k_loss": 0.89794921875, |
|
"eval_gsm8k_runtime": 57.2198, |
|
"eval_gsm8k_samples_per_second": 23.051, |
|
"eval_gsm8k_steps_per_second": 2.884, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_webgpt_accuracy": 0.4994055667344498, |
|
"eval_webgpt_loss": 2.18359375, |
|
"eval_webgpt_runtime": 155.137, |
|
"eval_webgpt_samples_per_second": 22.967, |
|
"eval_webgpt_steps_per_second": 2.875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.02e-06, |
|
"loss": 1.1772, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.070000000000001e-06, |
|
"loss": 1.2069, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.12e-06, |
|
"loss": 1.1755, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.1700000000000005e-06, |
|
"loss": 1.1658, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.220000000000001e-06, |
|
"loss": 1.1896, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.27e-06, |
|
"loss": 1.1743, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.320000000000001e-06, |
|
"loss": 1.1444, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.370000000000001e-06, |
|
"loss": 1.1812, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 1.1549, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.470000000000001e-06, |
|
"loss": 1.1929, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.5200000000000005e-06, |
|
"loss": 1.1317, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5.570000000000001e-06, |
|
"loss": 1.1531, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.620000000000001e-06, |
|
"loss": 1.1871, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.67e-06, |
|
"loss": 1.1507, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.72e-06, |
|
"loss": 1.1916, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.77e-06, |
|
"loss": 1.1532, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.82e-06, |
|
"loss": 1.1763, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.8700000000000005e-06, |
|
"loss": 1.1719, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.92e-06, |
|
"loss": 1.1784, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.9700000000000004e-06, |
|
"loss": 1.1597, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.02e-06, |
|
"loss": 1.1594, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.07e-06, |
|
"loss": 1.1769, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.120000000000001e-06, |
|
"loss": 1.1692, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.17e-06, |
|
"loss": 1.1327, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.220000000000001e-06, |
|
"loss": 1.1733, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.27e-06, |
|
"loss": 1.16, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.3200000000000005e-06, |
|
"loss": 1.1701, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.370000000000001e-06, |
|
"loss": 1.1649, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.42e-06, |
|
"loss": 1.1477, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.470000000000001e-06, |
|
"loss": 1.1498, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.520000000000001e-06, |
|
"loss": 1.1881, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.570000000000001e-06, |
|
"loss": 1.1414, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.620000000000001e-06, |
|
"loss": 1.1663, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.6700000000000005e-06, |
|
"loss": 1.1555, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.720000000000001e-06, |
|
"loss": 1.1652, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.770000000000001e-06, |
|
"loss": 1.1539, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.820000000000001e-06, |
|
"loss": 1.1633, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.870000000000001e-06, |
|
"loss": 1.1583, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.92e-06, |
|
"loss": 1.1404, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.97e-06, |
|
"loss": 1.1436, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.0200000000000006e-06, |
|
"loss": 1.1856, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.07e-06, |
|
"loss": 1.1587, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.1200000000000004e-06, |
|
"loss": 1.1296, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.17e-06, |
|
"loss": 1.1171, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.22e-06, |
|
"loss": 1.1459, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.270000000000001e-06, |
|
"loss": 1.1621, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.32e-06, |
|
"loss": 1.1345, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.370000000000001e-06, |
|
"loss": 1.1711, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 1.1852, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.4700000000000005e-06, |
|
"loss": 1.1361, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_multi_news_accuracy": 0.5626650769023163, |
|
"eval_multi_news_loss": 1.9013671875, |
|
"eval_multi_news_runtime": 374.2125, |
|
"eval_multi_news_samples_per_second": 15.024, |
|
"eval_multi_news_steps_per_second": 1.879, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_samsum_accuracy": 0.641110533036939, |
|
"eval_samsum_loss": 1.267578125, |
|
"eval_samsum_runtime": 37.1994, |
|
"eval_samsum_samples_per_second": 21.99, |
|
"eval_samsum_steps_per_second": 2.769, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_billsum_accuracy": 0.648249370750216, |
|
"eval_billsum_loss": 1.453125, |
|
"eval_billsum_runtime": 204.445, |
|
"eval_billsum_samples_per_second": 15.99, |
|
"eval_billsum_steps_per_second": 2.001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_wmt2019_zh-en_accuracy": 0.5873898487705391, |
|
"eval_wmt2019_zh-en_loss": 1.892578125, |
|
"eval_wmt2019_zh-en_runtime": 43.8258, |
|
"eval_wmt2019_zh-en_samples_per_second": 22.818, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.852, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_en-ja_accuracy": 0.5575474107655961, |
|
"eval_ted_trans_en-ja_loss": 1.8818359375, |
|
"eval_ted_trans_en-ja_runtime": 35.7188, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.425, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.828, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ted_trans_zh-ja_accuracy": 0.45999153259949194, |
|
"eval_ted_trans_zh-ja_loss": 2.556640625, |
|
"eval_ted_trans_zh-ja_runtime": 2.58, |
|
"eval_ted_trans_zh-ja_samples_per_second": 16.279, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.326, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_sharegpt_accuracy": 0.7297610954662402, |
|
"eval_sharegpt_loss": 1.0302734375, |
|
"eval_sharegpt_runtime": 732.545, |
|
"eval_sharegpt_samples_per_second": 4.57, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_dolly15k_accuracy": 0.5962685032894737, |
|
"eval_dolly15k_loss": 1.646484375, |
|
"eval_dolly15k_runtime": 33.5813, |
|
"eval_dolly15k_samples_per_second": 22.364, |
|
"eval_dolly15k_steps_per_second": 2.799, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_ikala_accuracy": 0.7406414384414164, |
|
"eval_ikala_loss": 0.96875, |
|
"eval_ikala_runtime": 885.454, |
|
"eval_ikala_samples_per_second": 16.044, |
|
"eval_ikala_steps_per_second": 2.006, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_oasst_export_accuracy": 0.6599712470813749, |
|
"eval_oasst_export_loss": 1.578125, |
|
"eval_oasst_export_runtime": 133.2511, |
|
"eval_oasst_export_samples_per_second": 15.752, |
|
"eval_oasst_export_steps_per_second": 1.974, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_joke_accuracy": 0.49838893100833964, |
|
"eval_joke_loss": 2.1953125, |
|
"eval_joke_runtime": 4.5928, |
|
"eval_joke_samples_per_second": 16.548, |
|
"eval_joke_steps_per_second": 2.177, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_gsm8k_accuracy": 0.7668082149474984, |
|
"eval_gsm8k_loss": 0.85791015625, |
|
"eval_gsm8k_runtime": 57.7515, |
|
"eval_gsm8k_samples_per_second": 22.839, |
|
"eval_gsm8k_steps_per_second": 2.857, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_webgpt_accuracy": 0.4995741373619939, |
|
"eval_webgpt_loss": 2.181640625, |
|
"eval_webgpt_runtime": 154.199, |
|
"eval_webgpt_samples_per_second": 23.107, |
|
"eval_webgpt_steps_per_second": 2.892, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.520000000000001e-06, |
|
"loss": 1.1574, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.57e-06, |
|
"loss": 1.1593, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.620000000000001e-06, |
|
"loss": 1.1255, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.670000000000001e-06, |
|
"loss": 1.1665, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.72e-06, |
|
"loss": 1.1459, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.77e-06, |
|
"loss": 1.1187, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.820000000000001e-06, |
|
"loss": 1.1469, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.870000000000001e-06, |
|
"loss": 1.1648, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.92e-06, |
|
"loss": 1.1314, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.970000000000002e-06, |
|
"loss": 1.1213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.020000000000001e-06, |
|
"loss": 1.1424, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.07e-06, |
|
"loss": 1.1637, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.120000000000002e-06, |
|
"loss": 1.1403, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.17e-06, |
|
"loss": 1.1299, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.220000000000001e-06, |
|
"loss": 1.1361, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.27e-06, |
|
"loss": 1.1484, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.32e-06, |
|
"loss": 1.1292, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.370000000000001e-06, |
|
"loss": 1.1395, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.42e-06, |
|
"loss": 1.1299, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.47e-06, |
|
"loss": 1.145, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.52e-06, |
|
"loss": 1.1351, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.570000000000001e-06, |
|
"loss": 1.1579, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.62e-06, |
|
"loss": 1.1483, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.67e-06, |
|
"loss": 1.1278, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.720000000000001e-06, |
|
"loss": 1.1375, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.77e-06, |
|
"loss": 1.1526, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.82e-06, |
|
"loss": 1.1535, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.870000000000001e-06, |
|
"loss": 1.1377, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 1.1578, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.97e-06, |
|
"loss": 1.1598, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.020000000000002e-06, |
|
"loss": 1.1601, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.070000000000001e-06, |
|
"loss": 1.1292, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.12e-06, |
|
"loss": 1.111, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.17e-06, |
|
"loss": 1.12, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.220000000000002e-06, |
|
"loss": 1.1, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.270000000000001e-06, |
|
"loss": 1.099, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.32e-06, |
|
"loss": 1.1333, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.370000000000002e-06, |
|
"loss": 1.1386, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.42e-06, |
|
"loss": 1.1389, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.47e-06, |
|
"loss": 1.1294, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.52e-06, |
|
"loss": 1.1326, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.57e-06, |
|
"loss": 1.129, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.620000000000001e-06, |
|
"loss": 1.1224, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.67e-06, |
|
"loss": 1.1168, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.72e-06, |
|
"loss": 1.1223, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.770000000000001e-06, |
|
"loss": 1.1064, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.820000000000001e-06, |
|
"loss": 1.1303, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.87e-06, |
|
"loss": 1.1134, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 1.1396, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.970000000000001e-06, |
|
"loss": 1.1418, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_multi_news_accuracy": 0.5614524803428215, |
|
"eval_multi_news_loss": 1.9052734375, |
|
"eval_multi_news_runtime": 373.3978, |
|
"eval_multi_news_samples_per_second": 15.056, |
|
"eval_multi_news_steps_per_second": 1.883, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_samsum_accuracy": 0.6388875750839521, |
|
"eval_samsum_loss": 1.265625, |
|
"eval_samsum_runtime": 37.3723, |
|
"eval_samsum_samples_per_second": 21.888, |
|
"eval_samsum_steps_per_second": 2.756, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_billsum_accuracy": 0.6493294263495999, |
|
"eval_billsum_loss": 1.4462890625, |
|
"eval_billsum_runtime": 203.77, |
|
"eval_billsum_samples_per_second": 16.043, |
|
"eval_billsum_steps_per_second": 2.007, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_wmt2019_zh-en_accuracy": 0.5823181343543334, |
|
"eval_wmt2019_zh-en_loss": 1.9228515625, |
|
"eval_wmt2019_zh-en_runtime": 43.5037, |
|
"eval_wmt2019_zh-en_samples_per_second": 22.987, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.873, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_en-ja_accuracy": 0.5623202978930665, |
|
"eval_ted_trans_en-ja_loss": 1.869140625, |
|
"eval_ted_trans_en-ja_runtime": 35.4889, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.57, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.846, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ted_trans_zh-ja_accuracy": 0.46688327918020495, |
|
"eval_ted_trans_zh-ja_loss": 2.46875, |
|
"eval_ted_trans_zh-ja_runtime": 2.6642, |
|
"eval_ted_trans_zh-ja_samples_per_second": 15.765, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.252, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_sharegpt_accuracy": 0.7361997474453208, |
|
"eval_sharegpt_loss": 1.001953125, |
|
"eval_sharegpt_runtime": 732.4255, |
|
"eval_sharegpt_samples_per_second": 4.571, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_dolly15k_accuracy": 0.5939898574561403, |
|
"eval_dolly15k_loss": 1.65625, |
|
"eval_dolly15k_runtime": 33.8567, |
|
"eval_dolly15k_samples_per_second": 22.182, |
|
"eval_dolly15k_steps_per_second": 2.776, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_ikala_accuracy": 0.7422763555784087, |
|
"eval_ikala_loss": 0.9580078125, |
|
"eval_ikala_runtime": 885.2845, |
|
"eval_ikala_samples_per_second": 16.047, |
|
"eval_ikala_steps_per_second": 2.006, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_oasst_export_accuracy": 0.6593580262738169, |
|
"eval_oasst_export_loss": 1.578125, |
|
"eval_oasst_export_runtime": 132.7253, |
|
"eval_oasst_export_samples_per_second": 15.815, |
|
"eval_oasst_export_steps_per_second": 1.982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_joke_accuracy": 0.49895754359363154, |
|
"eval_joke_loss": 2.171875, |
|
"eval_joke_runtime": 4.5049, |
|
"eval_joke_samples_per_second": 16.871, |
|
"eval_joke_steps_per_second": 2.22, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gsm8k_accuracy": 0.775555898702903, |
|
"eval_gsm8k_loss": 0.8232421875, |
|
"eval_gsm8k_runtime": 56.3886, |
|
"eval_gsm8k_samples_per_second": 23.391, |
|
"eval_gsm8k_steps_per_second": 2.926, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_webgpt_accuracy": 0.4990524556304364, |
|
"eval_webgpt_loss": 2.185546875, |
|
"eval_webgpt_runtime": 154.0524, |
|
"eval_webgpt_samples_per_second": 23.128, |
|
"eval_webgpt_steps_per_second": 2.895, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.99914354230901e-06, |
|
"loss": 1.116, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.997002398081536e-06, |
|
"loss": 1.1243, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.99486125385406e-06, |
|
"loss": 1.1183, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.992720109626585e-06, |
|
"loss": 1.1324, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.99057896539911e-06, |
|
"loss": 1.1033, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.988437821171634e-06, |
|
"loss": 1.0962, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.98629667694416e-06, |
|
"loss": 1.1253, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.984155532716685e-06, |
|
"loss": 1.1522, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.98201438848921e-06, |
|
"loss": 1.142, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.979873244261734e-06, |
|
"loss": 1.1289, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.97773210003426e-06, |
|
"loss": 1.1367, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.975590955806785e-06, |
|
"loss": 1.1303, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.973449811579308e-06, |
|
"loss": 1.1041, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.971308667351834e-06, |
|
"loss": 1.1325, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.969167523124359e-06, |
|
"loss": 1.1371, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.967026378896883e-06, |
|
"loss": 1.112, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.964885234669408e-06, |
|
"loss": 1.1172, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.962744090441932e-06, |
|
"loss": 1.0959, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.960602946214459e-06, |
|
"loss": 1.1322, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.958461801986983e-06, |
|
"loss": 1.1098, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.956320657759508e-06, |
|
"loss": 1.1185, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.954179513532032e-06, |
|
"loss": 1.1027, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.952038369304557e-06, |
|
"loss": 1.1217, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.949897225077082e-06, |
|
"loss": 1.115, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.947756080849606e-06, |
|
"loss": 1.1197, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.945614936622131e-06, |
|
"loss": 1.0926, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.943473792394657e-06, |
|
"loss": 1.1085, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.94133264816718e-06, |
|
"loss": 1.139, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.939191503939706e-06, |
|
"loss": 1.1131, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.937050359712231e-06, |
|
"loss": 1.1281, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.934909215484757e-06, |
|
"loss": 1.0962, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.93276807125728e-06, |
|
"loss": 1.107, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.930626927029806e-06, |
|
"loss": 1.1082, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.928485782802331e-06, |
|
"loss": 1.1323, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.926344638574855e-06, |
|
"loss": 1.0984, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.92420349434738e-06, |
|
"loss": 1.118, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.922062350119905e-06, |
|
"loss": 1.1003, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.919921205892429e-06, |
|
"loss": 1.115, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.917780061664954e-06, |
|
"loss": 1.0974, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.915638917437478e-06, |
|
"loss": 1.107, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.913497773210005e-06, |
|
"loss": 1.1101, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.911356628982529e-06, |
|
"loss": 1.1115, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.909215484755054e-06, |
|
"loss": 1.0951, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.907074340527578e-06, |
|
"loss": 1.0938, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.904933196300103e-06, |
|
"loss": 1.0772, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.902792052072629e-06, |
|
"loss": 1.1028, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.900650907845152e-06, |
|
"loss": 1.0923, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.898509763617678e-06, |
|
"loss": 1.1238, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.896368619390203e-06, |
|
"loss": 1.1401, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.894227475162728e-06, |
|
"loss": 1.1142, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_multi_news_accuracy": 0.5619751512736382, |
|
"eval_multi_news_loss": 1.904296875, |
|
"eval_multi_news_runtime": 373.6389, |
|
"eval_multi_news_samples_per_second": 15.047, |
|
"eval_multi_news_steps_per_second": 1.881, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_samsum_accuracy": 0.6433334909899258, |
|
"eval_samsum_loss": 1.2607421875, |
|
"eval_samsum_runtime": 37.306, |
|
"eval_samsum_samples_per_second": 21.927, |
|
"eval_samsum_steps_per_second": 2.761, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_billsum_accuracy": 0.650991772793869, |
|
"eval_billsum_loss": 1.439453125, |
|
"eval_billsum_runtime": 203.4152, |
|
"eval_billsum_samples_per_second": 16.071, |
|
"eval_billsum_steps_per_second": 2.011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_wmt2019_zh-en_accuracy": 0.5893870117057808, |
|
"eval_wmt2019_zh-en_loss": 1.904296875, |
|
"eval_wmt2019_zh-en_runtime": 43.0674, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.219, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_ted_trans_en-ja_accuracy": 0.563225558860213, |
|
"eval_ted_trans_en-ja_loss": 1.8388671875, |
|
"eval_ted_trans_en-ja_runtime": 35.7494, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.406, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.825, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_ted_trans_zh-ja_accuracy": 0.4661596958174905, |
|
"eval_ted_trans_zh-ja_loss": 2.453125, |
|
"eval_ted_trans_zh-ja_runtime": 2.525, |
|
"eval_ted_trans_zh-ja_samples_per_second": 16.634, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.376, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_sharegpt_accuracy": 0.7423548548826656, |
|
"eval_sharegpt_loss": 0.97265625, |
|
"eval_sharegpt_runtime": 732.2894, |
|
"eval_sharegpt_samples_per_second": 4.572, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_dolly15k_accuracy": 0.5930646929824561, |
|
"eval_dolly15k_loss": 1.6513671875, |
|
"eval_dolly15k_runtime": 33.3723, |
|
"eval_dolly15k_samples_per_second": 22.504, |
|
"eval_dolly15k_steps_per_second": 2.817, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_ikala_accuracy": 0.7440914978067725, |
|
"eval_ikala_loss": 0.9453125, |
|
"eval_ikala_runtime": 884.831, |
|
"eval_ikala_samples_per_second": 16.055, |
|
"eval_ikala_steps_per_second": 2.007, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_oasst_export_accuracy": 0.6600834038561538, |
|
"eval_oasst_export_loss": 1.5791015625, |
|
"eval_oasst_export_runtime": 133.5652, |
|
"eval_oasst_export_samples_per_second": 15.715, |
|
"eval_oasst_export_steps_per_second": 1.969, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_joke_accuracy": 0.5195223654283548, |
|
"eval_joke_loss": 2.078125, |
|
"eval_joke_runtime": 4.5929, |
|
"eval_joke_samples_per_second": 16.547, |
|
"eval_joke_steps_per_second": 2.177, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_gsm8k_accuracy": 0.782682211241507, |
|
"eval_gsm8k_loss": 0.796875, |
|
"eval_gsm8k_runtime": 56.5404, |
|
"eval_gsm8k_samples_per_second": 23.328, |
|
"eval_gsm8k_steps_per_second": 2.918, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_webgpt_accuracy": 0.49846334564786127, |
|
"eval_webgpt_loss": 2.189453125, |
|
"eval_webgpt_runtime": 154.9389, |
|
"eval_webgpt_samples_per_second": 22.996, |
|
"eval_webgpt_steps_per_second": 2.879, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.892086330935252e-06, |
|
"loss": 1.1335, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.889945186707778e-06, |
|
"loss": 1.0999, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.887804042480303e-06, |
|
"loss": 1.1324, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.885662898252827e-06, |
|
"loss": 1.0832, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.883521754025352e-06, |
|
"loss": 1.1, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.881380609797877e-06, |
|
"loss": 1.1153, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.879239465570401e-06, |
|
"loss": 1.0958, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.877098321342926e-06, |
|
"loss": 1.1154, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.874957177115452e-06, |
|
"loss": 1.1139, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.872816032887977e-06, |
|
"loss": 1.0739, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.8706748886605e-06, |
|
"loss": 1.1105, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.868533744433026e-06, |
|
"loss": 1.0969, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.866392600205552e-06, |
|
"loss": 1.1207, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.864251455978075e-06, |
|
"loss": 1.1392, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.8621103117506e-06, |
|
"loss": 1.1161, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.859969167523126e-06, |
|
"loss": 1.0767, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.85782802329565e-06, |
|
"loss": 1.1113, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.855686879068175e-06, |
|
"loss": 1.0801, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.853545734840699e-06, |
|
"loss": 1.0835, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.851404590613226e-06, |
|
"loss": 1.0635, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.84926344638575e-06, |
|
"loss": 1.095, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.847122302158275e-06, |
|
"loss": 1.0822, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.844981157930798e-06, |
|
"loss": 1.0983, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.842840013703324e-06, |
|
"loss": 1.1245, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.84069886947585e-06, |
|
"loss": 1.0768, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.838557725248373e-06, |
|
"loss": 1.0958, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.836416581020898e-06, |
|
"loss": 1.0869, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.834275436793424e-06, |
|
"loss": 1.126, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.832134292565947e-06, |
|
"loss": 1.0823, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.829993148338473e-06, |
|
"loss": 1.1057, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.827852004110998e-06, |
|
"loss": 1.0717, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.825710859883523e-06, |
|
"loss": 1.0835, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.823569715656047e-06, |
|
"loss": 1.1291, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.821428571428573e-06, |
|
"loss": 1.0856, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.819287427201098e-06, |
|
"loss": 1.0972, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.817146282973622e-06, |
|
"loss": 1.0833, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.815005138746147e-06, |
|
"loss": 1.1124, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.812863994518672e-06, |
|
"loss": 1.0905, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.810722850291196e-06, |
|
"loss": 1.0891, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.808581706063721e-06, |
|
"loss": 1.0931, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.806440561836245e-06, |
|
"loss": 1.1066, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.804299417608772e-06, |
|
"loss": 1.0759, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.802158273381296e-06, |
|
"loss": 1.0996, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.800017129153821e-06, |
|
"loss": 1.0868, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.797875984926345e-06, |
|
"loss": 1.0799, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.79573484069887e-06, |
|
"loss": 1.0989, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.793593696471396e-06, |
|
"loss": 1.0841, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.79145255224392e-06, |
|
"loss": 1.0745, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.789311408016445e-06, |
|
"loss": 1.0742, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.78717026378897e-06, |
|
"loss": 1.0745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_multi_news_accuracy": 0.5619935239487821, |
|
"eval_multi_news_loss": 1.9033203125, |
|
"eval_multi_news_runtime": 373.8934, |
|
"eval_multi_news_samples_per_second": 15.036, |
|
"eval_multi_news_steps_per_second": 1.88, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_samsum_accuracy": 0.6469753582746063, |
|
"eval_samsum_loss": 1.24609375, |
|
"eval_samsum_runtime": 37.2777, |
|
"eval_samsum_samples_per_second": 21.943, |
|
"eval_samsum_steps_per_second": 2.763, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_billsum_accuracy": 0.6516022390022165, |
|
"eval_billsum_loss": 1.431640625, |
|
"eval_billsum_runtime": 204.7394, |
|
"eval_billsum_samples_per_second": 15.967, |
|
"eval_billsum_steps_per_second": 1.998, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_wmt2019_zh-en_accuracy": 0.5844544095665172, |
|
"eval_wmt2019_zh-en_loss": 1.8984375, |
|
"eval_wmt2019_zh-en_runtime": 42.4024, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.584, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.948, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_ted_trans_en-ja_accuracy": 0.577170182658057, |
|
"eval_ted_trans_en-ja_loss": 1.7958984375, |
|
"eval_ted_trans_en-ja_runtime": 35.5789, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.513, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.839, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_ted_trans_zh-ja_accuracy": 0.46690610569522834, |
|
"eval_ted_trans_zh-ja_loss": 2.515625, |
|
"eval_ted_trans_zh-ja_runtime": 2.5484, |
|
"eval_ted_trans_zh-ja_samples_per_second": 16.481, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.354, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_sharegpt_accuracy": 0.7475412115956699, |
|
"eval_sharegpt_loss": 0.9453125, |
|
"eval_sharegpt_runtime": 731.2857, |
|
"eval_sharegpt_samples_per_second": 4.578, |
|
"eval_sharegpt_steps_per_second": 0.573, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_dolly15k_accuracy": 0.5928933662280702, |
|
"eval_dolly15k_loss": 1.65625, |
|
"eval_dolly15k_runtime": 34.5989, |
|
"eval_dolly15k_samples_per_second": 21.706, |
|
"eval_dolly15k_steps_per_second": 2.717, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_ikala_accuracy": 0.7469942144047141, |
|
"eval_ikala_loss": 0.9296875, |
|
"eval_ikala_runtime": 884.7774, |
|
"eval_ikala_samples_per_second": 16.056, |
|
"eval_ikala_steps_per_second": 2.007, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_oasst_export_accuracy": 0.6607141036415994, |
|
"eval_oasst_export_loss": 1.5732421875, |
|
"eval_oasst_export_runtime": 132.9167, |
|
"eval_oasst_export_samples_per_second": 15.792, |
|
"eval_oasst_export_steps_per_second": 1.979, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_joke_accuracy": 0.5242608036391205, |
|
"eval_joke_loss": 2.025390625, |
|
"eval_joke_runtime": 4.5573, |
|
"eval_joke_samples_per_second": 16.677, |
|
"eval_joke_steps_per_second": 2.194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_gsm8k_accuracy": 0.7849444101297097, |
|
"eval_gsm8k_loss": 0.783203125, |
|
"eval_gsm8k_runtime": 56.5634, |
|
"eval_gsm8k_samples_per_second": 23.319, |
|
"eval_gsm8k_steps_per_second": 2.917, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_webgpt_accuracy": 0.498837749883775, |
|
"eval_webgpt_loss": 2.19140625, |
|
"eval_webgpt_runtime": 153.8546, |
|
"eval_webgpt_samples_per_second": 23.158, |
|
"eval_webgpt_steps_per_second": 2.899, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.785029119561494e-06, |
|
"loss": 1.0876, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.782887975334019e-06, |
|
"loss": 1.0761, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.780746831106544e-06, |
|
"loss": 1.103, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.77860568687907e-06, |
|
"loss": 1.0891, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.776464542651593e-06, |
|
"loss": 1.0852, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.774323398424119e-06, |
|
"loss": 1.1041, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.772182254196644e-06, |
|
"loss": 1.0801, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.770041109969168e-06, |
|
"loss": 1.0851, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.767899965741693e-06, |
|
"loss": 1.0839, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.765758821514219e-06, |
|
"loss": 1.0756, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.763617677286742e-06, |
|
"loss": 1.0604, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.761476533059268e-06, |
|
"loss": 1.0613, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.759335388831791e-06, |
|
"loss": 1.0839, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.757194244604318e-06, |
|
"loss": 1.0873, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.755053100376842e-06, |
|
"loss": 1.0935, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.752911956149367e-06, |
|
"loss": 1.0821, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.750770811921891e-06, |
|
"loss": 1.0679, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.748629667694417e-06, |
|
"loss": 1.0939, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.746488523466942e-06, |
|
"loss": 1.0764, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.744347379239466e-06, |
|
"loss": 1.0772, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.742206235011991e-06, |
|
"loss": 1.0983, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.740065090784516e-06, |
|
"loss": 1.0649, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.73792394655704e-06, |
|
"loss": 1.0829, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.735782802329565e-06, |
|
"loss": 1.0914, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.73364165810209e-06, |
|
"loss": 1.0776, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.731500513874616e-06, |
|
"loss": 1.0698, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.72935936964714e-06, |
|
"loss": 1.074, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.727218225419665e-06, |
|
"loss": 1.0951, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.72507708119219e-06, |
|
"loss": 1.0586, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.722935936964714e-06, |
|
"loss": 1.066, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.72079479273724e-06, |
|
"loss": 1.0897, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.718653648509765e-06, |
|
"loss": 1.079, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.716512504282289e-06, |
|
"loss": 1.063, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.714371360054814e-06, |
|
"loss": 1.0688, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.712230215827338e-06, |
|
"loss": 1.0845, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.710089071599865e-06, |
|
"loss": 1.0421, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.707947927372388e-06, |
|
"loss": 1.0735, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.705806783144914e-06, |
|
"loss": 1.0848, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.703665638917438e-06, |
|
"loss": 1.0863, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.701524494689963e-06, |
|
"loss": 1.0372, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.699383350462488e-06, |
|
"loss": 1.0741, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.697242206235012e-06, |
|
"loss": 1.0988, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.695101062007537e-06, |
|
"loss": 1.0808, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.692959917780063e-06, |
|
"loss": 1.0717, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.690818773552586e-06, |
|
"loss": 1.0632, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.688677629325112e-06, |
|
"loss": 1.0539, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.686536485097637e-06, |
|
"loss": 1.0944, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.684395340870162e-06, |
|
"loss": 1.0682, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.682254196642686e-06, |
|
"loss": 1.066, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.680113052415212e-06, |
|
"loss": 1.0649, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_multi_news_accuracy": 0.5626207290657621, |
|
"eval_multi_news_loss": 1.90234375, |
|
"eval_multi_news_runtime": 374.9582, |
|
"eval_multi_news_samples_per_second": 14.994, |
|
"eval_multi_news_steps_per_second": 1.875, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_samsum_accuracy": 0.6482050796954074, |
|
"eval_samsum_loss": 1.244140625, |
|
"eval_samsum_runtime": 36.4554, |
|
"eval_samsum_samples_per_second": 22.438, |
|
"eval_samsum_steps_per_second": 2.825, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_billsum_accuracy": 0.6542856069509964, |
|
"eval_billsum_loss": 1.427734375, |
|
"eval_billsum_runtime": 204.9118, |
|
"eval_billsum_samples_per_second": 15.953, |
|
"eval_billsum_steps_per_second": 1.996, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_wmt2019_zh-en_accuracy": 0.5960585499733171, |
|
"eval_wmt2019_zh-en_loss": 1.8671875, |
|
"eval_wmt2019_zh-en_runtime": 42.5542, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.499, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.937, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_ted_trans_en-ja_accuracy": 0.5799230113905279, |
|
"eval_ted_trans_en-ja_loss": 1.7705078125, |
|
"eval_ted_trans_en-ja_runtime": 35.599, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.501, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.837, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_ted_trans_zh-ja_accuracy": 0.48124428179322964, |
|
"eval_ted_trans_zh-ja_loss": 2.44140625, |
|
"eval_ted_trans_zh-ja_runtime": 2.5311, |
|
"eval_ted_trans_zh-ja_samples_per_second": 16.594, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.371, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_sharegpt_accuracy": 0.7523055854464493, |
|
"eval_sharegpt_loss": 0.92236328125, |
|
"eval_sharegpt_runtime": 732.6588, |
|
"eval_sharegpt_samples_per_second": 4.57, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_dolly15k_accuracy": 0.5933216831140351, |
|
"eval_dolly15k_loss": 1.65625, |
|
"eval_dolly15k_runtime": 33.8299, |
|
"eval_dolly15k_samples_per_second": 22.199, |
|
"eval_dolly15k_steps_per_second": 2.779, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_ikala_accuracy": 0.7489160065784373, |
|
"eval_ikala_loss": 0.9208984375, |
|
"eval_ikala_runtime": 886.4258, |
|
"eval_ikala_samples_per_second": 16.026, |
|
"eval_ikala_steps_per_second": 2.004, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_oasst_export_accuracy": 0.6617497330814418, |
|
"eval_oasst_export_loss": 1.572265625, |
|
"eval_oasst_export_runtime": 135.1792, |
|
"eval_oasst_export_samples_per_second": 15.528, |
|
"eval_oasst_export_steps_per_second": 1.946, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_joke_accuracy": 0.5256823351023503, |
|
"eval_joke_loss": 2.013671875, |
|
"eval_joke_runtime": 3.6235, |
|
"eval_joke_samples_per_second": 20.974, |
|
"eval_joke_steps_per_second": 2.76, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_gsm8k_accuracy": 0.7892603458925262, |
|
"eval_gsm8k_loss": 0.77001953125, |
|
"eval_gsm8k_runtime": 56.8179, |
|
"eval_gsm8k_samples_per_second": 23.215, |
|
"eval_gsm8k_steps_per_second": 2.904, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_webgpt_accuracy": 0.49841011281811054, |
|
"eval_webgpt_loss": 2.19140625, |
|
"eval_webgpt_runtime": 157.0655, |
|
"eval_webgpt_samples_per_second": 22.685, |
|
"eval_webgpt_steps_per_second": 2.84, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.677971908187737e-06, |
|
"loss": 1.0692, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.67583076396026e-06, |
|
"loss": 1.1059, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.673689619732786e-06, |
|
"loss": 1.0758, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.671548475505311e-06, |
|
"loss": 1.0386, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.669407331277835e-06, |
|
"loss": 1.0865, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.66726618705036e-06, |
|
"loss": 1.0537, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.665125042822884e-06, |
|
"loss": 1.0481, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.662983898595411e-06, |
|
"loss": 1.0811, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.660842754367935e-06, |
|
"loss": 1.0518, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.65870161014046e-06, |
|
"loss": 1.0756, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.656560465912986e-06, |
|
"loss": 1.0594, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.65441932168551e-06, |
|
"loss": 1.0842, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.652278177458035e-06, |
|
"loss": 1.0703, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.650137033230558e-06, |
|
"loss": 1.0649, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.647995889003084e-06, |
|
"loss": 1.0869, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.645854744775609e-06, |
|
"loss": 1.0494, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.643713600548134e-06, |
|
"loss": 1.0575, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.641572456320658e-06, |
|
"loss": 1.0846, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.639431312093183e-06, |
|
"loss": 1.0815, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.637290167865709e-06, |
|
"loss": 1.0593, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.635149023638232e-06, |
|
"loss": 1.0936, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.633007879410758e-06, |
|
"loss": 1.0249, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.630866735183283e-06, |
|
"loss": 1.0382, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.628725590955807e-06, |
|
"loss": 1.0528, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.626584446728332e-06, |
|
"loss": 1.0469, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.624443302500858e-06, |
|
"loss": 1.053, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.622302158273383e-06, |
|
"loss": 1.0301, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.620161014045907e-06, |
|
"loss": 1.0913, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.618019869818432e-06, |
|
"loss": 1.0633, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.615878725590957e-06, |
|
"loss": 1.0743, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.613737581363481e-06, |
|
"loss": 1.0486, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.611596437136006e-06, |
|
"loss": 1.0491, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.609455292908532e-06, |
|
"loss": 1.0736, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.607314148681056e-06, |
|
"loss": 1.0729, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.605173004453581e-06, |
|
"loss": 1.0625, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.603031860226105e-06, |
|
"loss": 1.0726, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.600890715998632e-06, |
|
"loss": 1.0666, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.598749571771155e-06, |
|
"loss": 1.0773, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.59660842754368e-06, |
|
"loss": 1.065, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.594467283316204e-06, |
|
"loss": 1.0404, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.59232613908873e-06, |
|
"loss": 1.0717, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.590184994861255e-06, |
|
"loss": 1.0667, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.588043850633779e-06, |
|
"loss": 1.0603, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.585902706406304e-06, |
|
"loss": 1.0452, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.58376156217883e-06, |
|
"loss": 1.0681, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.581620417951353e-06, |
|
"loss": 1.075, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.579479273723879e-06, |
|
"loss": 1.0735, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.577338129496404e-06, |
|
"loss": 1.0859, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.57519698526893e-06, |
|
"loss": 1.0498, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.573055841041453e-06, |
|
"loss": 1.0353, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_multi_news_accuracy": 0.5627512384133357, |
|
"eval_multi_news_loss": 1.9013671875, |
|
"eval_multi_news_runtime": 374.2642, |
|
"eval_multi_news_samples_per_second": 15.021, |
|
"eval_multi_news_steps_per_second": 1.878, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_samsum_accuracy": 0.6499550678711631, |
|
"eval_samsum_loss": 1.228515625, |
|
"eval_samsum_runtime": 37.472, |
|
"eval_samsum_samples_per_second": 21.83, |
|
"eval_samsum_steps_per_second": 2.749, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_billsum_accuracy": 0.6559680786548813, |
|
"eval_billsum_loss": 1.4189453125, |
|
"eval_billsum_runtime": 204.7196, |
|
"eval_billsum_samples_per_second": 15.968, |
|
"eval_billsum_steps_per_second": 1.998, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_wmt2019_zh-en_accuracy": 0.605999539382773, |
|
"eval_wmt2019_zh-en_loss": 1.8330078125, |
|
"eval_wmt2019_zh-en_runtime": 43.2969, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.096, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.887, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_ted_trans_en-ja_accuracy": 0.591362074351765, |
|
"eval_ted_trans_en-ja_loss": 1.7236328125, |
|
"eval_ted_trans_en-ja_runtime": 36.1634, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.149, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.793, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_ted_trans_zh-ja_accuracy": 0.4934623430962343, |
|
"eval_ted_trans_zh-ja_loss": 2.33984375, |
|
"eval_ted_trans_zh-ja_runtime": 2.8371, |
|
"eval_ted_trans_zh-ja_samples_per_second": 14.804, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.115, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_sharegpt_accuracy": 0.7566927466258041, |
|
"eval_sharegpt_loss": 0.90234375, |
|
"eval_sharegpt_runtime": 732.9729, |
|
"eval_sharegpt_samples_per_second": 4.568, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_dolly15k_accuracy": 0.5929618969298246, |
|
"eval_dolly15k_loss": 1.65625, |
|
"eval_dolly15k_runtime": 33.6288, |
|
"eval_dolly15k_samples_per_second": 22.332, |
|
"eval_dolly15k_steps_per_second": 2.795, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_ikala_accuracy": 0.7508814779446873, |
|
"eval_ikala_loss": 0.91015625, |
|
"eval_ikala_runtime": 887.5958, |
|
"eval_ikala_samples_per_second": 16.005, |
|
"eval_ikala_steps_per_second": 2.001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_oasst_export_accuracy": 0.6615836827915093, |
|
"eval_oasst_export_loss": 1.57421875, |
|
"eval_oasst_export_runtime": 134.7449, |
|
"eval_oasst_export_samples_per_second": 15.578, |
|
"eval_oasst_export_steps_per_second": 1.952, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_joke_accuracy": 0.535538286580743, |
|
"eval_joke_loss": 1.9736328125, |
|
"eval_joke_runtime": 3.6334, |
|
"eval_joke_samples_per_second": 20.917, |
|
"eval_joke_steps_per_second": 2.752, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_gsm8k_accuracy": 0.7943020382952439, |
|
"eval_gsm8k_loss": 0.74560546875, |
|
"eval_gsm8k_runtime": 57.4917, |
|
"eval_gsm8k_samples_per_second": 22.942, |
|
"eval_gsm8k_steps_per_second": 2.87, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_webgpt_accuracy": 0.49873483307959016, |
|
"eval_webgpt_loss": 2.19140625, |
|
"eval_webgpt_runtime": 155.9655, |
|
"eval_webgpt_samples_per_second": 22.845, |
|
"eval_webgpt_steps_per_second": 2.86, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.570914696813978e-06, |
|
"loss": 1.0657, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.568773552586504e-06, |
|
"loss": 1.0743, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.566632408359027e-06, |
|
"loss": 1.0543, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.564491264131553e-06, |
|
"loss": 1.0457, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.562350119904078e-06, |
|
"loss": 1.0546, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.560208975676602e-06, |
|
"loss": 1.0485, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.558067831449127e-06, |
|
"loss": 1.0535, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.555926687221651e-06, |
|
"loss": 1.0603, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.553785542994178e-06, |
|
"loss": 1.0444, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.551644398766702e-06, |
|
"loss": 1.0482, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.549503254539227e-06, |
|
"loss": 1.0509, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.54736211031175e-06, |
|
"loss": 1.036, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.545220966084276e-06, |
|
"loss": 1.0457, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.543079821856801e-06, |
|
"loss": 1.065, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.540938677629325e-06, |
|
"loss": 1.0441, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.53879753340185e-06, |
|
"loss": 1.047, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.536656389174376e-06, |
|
"loss": 1.05, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.5345152449469e-06, |
|
"loss": 1.0615, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.532374100719425e-06, |
|
"loss": 1.0575, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.53023295649195e-06, |
|
"loss": 1.0614, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.528091812264476e-06, |
|
"loss": 1.0504, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.525950668037e-06, |
|
"loss": 1.0401, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.524023638232272e-06, |
|
"loss": 1.0376, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.521882494004797e-06, |
|
"loss": 1.0265, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.519741349777321e-06, |
|
"loss": 1.0636, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.517600205549846e-06, |
|
"loss": 1.059, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.515459061322372e-06, |
|
"loss": 1.0552, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.513317917094897e-06, |
|
"loss": 1.0577, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.51117677286742e-06, |
|
"loss": 1.034, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.509035628639946e-06, |
|
"loss": 1.0697, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.506894484412471e-06, |
|
"loss": 1.0392, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.504753340184995e-06, |
|
"loss": 1.0069, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.50261219595752e-06, |
|
"loss": 1.0583, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.500471051730046e-06, |
|
"loss": 1.0522, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.49832990750257e-06, |
|
"loss": 1.0315, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.496188763275095e-06, |
|
"loss": 1.057, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.494047619047619e-06, |
|
"loss": 1.0513, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.491906474820146e-06, |
|
"loss": 1.0342, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.48976533059267e-06, |
|
"loss": 1.0559, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.487624186365195e-06, |
|
"loss": 1.0377, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.485483042137718e-06, |
|
"loss": 1.0512, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.483341897910244e-06, |
|
"loss": 1.0439, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.48120075368277e-06, |
|
"loss": 1.0344, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.479059609455293e-06, |
|
"loss": 1.0343, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.477132579650567e-06, |
|
"loss": 1.0463, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.47499143542309e-06, |
|
"loss": 1.0443, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.472850291195616e-06, |
|
"loss": 1.0559, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.47070914696814e-06, |
|
"loss": 1.0555, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.468568002740665e-06, |
|
"loss": 1.0267, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.46642685851319e-06, |
|
"loss": 1.042, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_multi_news_accuracy": 0.5627252632519255, |
|
"eval_multi_news_loss": 1.9013671875, |
|
"eval_multi_news_runtime": 374.4153, |
|
"eval_multi_news_samples_per_second": 15.015, |
|
"eval_multi_news_steps_per_second": 1.878, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_samsum_accuracy": 0.6482050796954074, |
|
"eval_samsum_loss": 1.2255859375, |
|
"eval_samsum_runtime": 38.0285, |
|
"eval_samsum_samples_per_second": 21.51, |
|
"eval_samsum_steps_per_second": 2.708, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_billsum_accuracy": 0.6552127105772998, |
|
"eval_billsum_loss": 1.4169921875, |
|
"eval_billsum_runtime": 204.5119, |
|
"eval_billsum_samples_per_second": 15.984, |
|
"eval_billsum_steps_per_second": 2.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_wmt2019_zh-en_accuracy": 0.5987996483045988, |
|
"eval_wmt2019_zh-en_loss": 1.8505859375, |
|
"eval_wmt2019_zh-en_runtime": 43.4755, |
|
"eval_wmt2019_zh-en_samples_per_second": 23.001, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.875, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_ted_trans_en-ja_accuracy": 0.5948422811429342, |
|
"eval_ted_trans_en-ja_loss": 1.69140625, |
|
"eval_ted_trans_en-ja_runtime": 36.0142, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.241, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.804, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_ted_trans_zh-ja_accuracy": 0.521213679609154, |
|
"eval_ted_trans_zh-ja_loss": 2.265625, |
|
"eval_ted_trans_zh-ja_runtime": 2.289, |
|
"eval_ted_trans_zh-ja_samples_per_second": 18.349, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.621, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_sharegpt_accuracy": 0.7602375296761273, |
|
"eval_sharegpt_loss": 0.884765625, |
|
"eval_sharegpt_runtime": 733.0075, |
|
"eval_sharegpt_samples_per_second": 4.567, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_dolly15k_accuracy": 0.5921566611842105, |
|
"eval_dolly15k_loss": 1.65234375, |
|
"eval_dolly15k_runtime": 33.7747, |
|
"eval_dolly15k_samples_per_second": 22.236, |
|
"eval_dolly15k_steps_per_second": 2.783, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_ikala_accuracy": 0.7515879865982705, |
|
"eval_ikala_loss": 0.90576171875, |
|
"eval_ikala_runtime": 884.7883, |
|
"eval_ikala_samples_per_second": 16.056, |
|
"eval_ikala_steps_per_second": 2.007, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_oasst_export_accuracy": 0.6615137668799588, |
|
"eval_oasst_export_loss": 1.5693359375, |
|
"eval_oasst_export_runtime": 134.5394, |
|
"eval_oasst_export_samples_per_second": 15.601, |
|
"eval_oasst_export_steps_per_second": 1.955, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_joke_accuracy": 0.5379075056861259, |
|
"eval_joke_loss": 1.966796875, |
|
"eval_joke_runtime": 4.5957, |
|
"eval_joke_samples_per_second": 16.537, |
|
"eval_joke_steps_per_second": 2.176, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_gsm8k_accuracy": 0.7959928968499074, |
|
"eval_gsm8k_loss": 0.74072265625, |
|
"eval_gsm8k_runtime": 57.0884, |
|
"eval_gsm8k_samples_per_second": 23.105, |
|
"eval_gsm8k_steps_per_second": 2.89, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_webgpt_accuracy": 0.4989797040964437, |
|
"eval_webgpt_loss": 2.19140625, |
|
"eval_webgpt_runtime": 157.3673, |
|
"eval_webgpt_samples_per_second": 22.641, |
|
"eval_webgpt_steps_per_second": 2.834, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.464285714285714e-06, |
|
"loss": 1.0577, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.46214457005824e-06, |
|
"loss": 1.0465, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.460003425830765e-06, |
|
"loss": 1.0635, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.457862281603289e-06, |
|
"loss": 1.035, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.455721137375814e-06, |
|
"loss": 1.0623, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.45357999314834e-06, |
|
"loss": 1.0279, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.451438848920865e-06, |
|
"loss": 1.0287, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.449297704693388e-06, |
|
"loss": 1.0567, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.447156560465914e-06, |
|
"loss": 1.0246, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.445015416238439e-06, |
|
"loss": 1.0352, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.442874272010963e-06, |
|
"loss": 1.0493, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.440733127783488e-06, |
|
"loss": 1.0435, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.438591983556014e-06, |
|
"loss": 1.0418, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.436450839328539e-06, |
|
"loss": 1.0586, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.434309695101063e-06, |
|
"loss": 1.0041, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.432168550873588e-06, |
|
"loss": 1.0236, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.430027406646113e-06, |
|
"loss": 1.0377, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.427886262418637e-06, |
|
"loss": 1.0385, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.425745118191162e-06, |
|
"loss": 1.0418, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.423603973963686e-06, |
|
"loss": 1.0304, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.421462829736211e-06, |
|
"loss": 1.0376, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.419321685508737e-06, |
|
"loss": 1.0377, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.41718054128126e-06, |
|
"loss": 1.0451, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.415039397053788e-06, |
|
"loss": 1.0359, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.412898252826311e-06, |
|
"loss": 1.0375, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.410757108598837e-06, |
|
"loss": 1.0575, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.40861596437136e-06, |
|
"loss": 1.042, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.406474820143886e-06, |
|
"loss": 1.0405, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.404333675916411e-06, |
|
"loss": 1.0538, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.402192531688935e-06, |
|
"loss": 1.0168, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.40005138746146e-06, |
|
"loss": 1.0406, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.397910243233985e-06, |
|
"loss": 1.0419, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.39576909900651e-06, |
|
"loss": 1.0249, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.393627954779035e-06, |
|
"loss": 1.0455, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.39148681055156e-06, |
|
"loss": 1.0314, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.389345666324085e-06, |
|
"loss": 1.0365, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.387204522096609e-06, |
|
"loss": 1.0503, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.385063377869134e-06, |
|
"loss": 1.0134, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.38292223364166e-06, |
|
"loss": 1.0655, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.380781089414183e-06, |
|
"loss": 1.0403, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.378639945186709e-06, |
|
"loss": 1.042, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.376498800959234e-06, |
|
"loss": 1.0564, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.374357656731758e-06, |
|
"loss": 1.0469, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.372216512504283e-06, |
|
"loss": 1.0323, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.370075368276807e-06, |
|
"loss": 1.0434, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.367934224049334e-06, |
|
"loss": 1.0474, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.365793079821858e-06, |
|
"loss": 1.0576, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.363651935594383e-06, |
|
"loss": 1.0588, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.361510791366907e-06, |
|
"loss": 1.0369, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.359369647139432e-06, |
|
"loss": 1.0159, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_multi_news_accuracy": 0.5636508659548628, |
|
"eval_multi_news_loss": 1.9013671875, |
|
"eval_multi_news_runtime": 374.2666, |
|
"eval_multi_news_samples_per_second": 15.021, |
|
"eval_multi_news_steps_per_second": 1.878, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_samsum_accuracy": 0.6542117958662441, |
|
"eval_samsum_loss": 1.22265625, |
|
"eval_samsum_runtime": 37.6015, |
|
"eval_samsum_samples_per_second": 21.754, |
|
"eval_samsum_steps_per_second": 2.739, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_billsum_accuracy": 0.6560928552644996, |
|
"eval_billsum_loss": 1.412109375, |
|
"eval_billsum_runtime": 204.697, |
|
"eval_billsum_samples_per_second": 15.97, |
|
"eval_billsum_steps_per_second": 1.998, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_wmt2019_zh-en_accuracy": 0.6045209655463313, |
|
"eval_wmt2019_zh-en_loss": 1.826171875, |
|
"eval_wmt2019_zh-en_runtime": 43.7238, |
|
"eval_wmt2019_zh-en_samples_per_second": 22.871, |
|
"eval_wmt2019_zh-en_steps_per_second": 2.859, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_ted_trans_en-ja_accuracy": 0.5986221480612599, |
|
"eval_ted_trans_en-ja_loss": 1.6884765625, |
|
"eval_ted_trans_en-ja_runtime": 35.5277, |
|
"eval_ted_trans_en-ja_samples_per_second": 22.546, |
|
"eval_ted_trans_en-ja_steps_per_second": 2.843, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_ted_trans_zh-ja_accuracy": 0.49819293855991104, |
|
"eval_ted_trans_zh-ja_loss": 2.29296875, |
|
"eval_ted_trans_zh-ja_runtime": 2.6448, |
|
"eval_ted_trans_zh-ja_samples_per_second": 15.88, |
|
"eval_ted_trans_zh-ja_steps_per_second": 2.269, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_sharegpt_accuracy": 0.7644941905120852, |
|
"eval_sharegpt_loss": 0.86474609375, |
|
"eval_sharegpt_runtime": 732.8989, |
|
"eval_sharegpt_samples_per_second": 4.568, |
|
"eval_sharegpt_steps_per_second": 0.572, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_dolly15k_accuracy": 0.5935786732456141, |
|
"eval_dolly15k_loss": 1.6474609375, |
|
"eval_dolly15k_runtime": 33.5372, |
|
"eval_dolly15k_samples_per_second": 22.393, |
|
"eval_dolly15k_steps_per_second": 2.803, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_ikala_accuracy": 0.7535167297168253, |
|
"eval_ikala_loss": 0.89990234375, |
|
"eval_ikala_runtime": 887.8229, |
|
"eval_ikala_samples_per_second": 16.001, |
|
"eval_ikala_steps_per_second": 2.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_oasst_export_accuracy": 0.6621677719692545, |
|
"eval_oasst_export_loss": 1.5673828125, |
|
"eval_oasst_export_runtime": 134.0182, |
|
"eval_oasst_export_samples_per_second": 15.662, |
|
"eval_oasst_export_steps_per_second": 1.962, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_joke_accuracy": 0.5397081122062168, |
|
"eval_joke_loss": 1.966796875, |
|
"eval_joke_runtime": 4.7628, |
|
"eval_joke_samples_per_second": 15.957, |
|
"eval_joke_steps_per_second": 2.1, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_gsm8k_accuracy": 0.7978304508956146, |
|
"eval_gsm8k_loss": 0.73388671875, |
|
"eval_gsm8k_runtime": 58.2683, |
|
"eval_gsm8k_samples_per_second": 22.637, |
|
"eval_gsm8k_steps_per_second": 2.832, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_webgpt_accuracy": 0.4992032819813969, |
|
"eval_webgpt_loss": 2.189453125, |
|
"eval_webgpt_runtime": 158.2652, |
|
"eval_webgpt_samples_per_second": 22.513, |
|
"eval_webgpt_steps_per_second": 2.818, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 48704, |
|
"num_train_epochs": 8, |
|
"total_flos": 3017973738504192.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|