|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.697350069735007, |
|
"eval_steps": 25, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 2.2776, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00014444444444444444, |
|
"loss": 2.2089, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00021666666666666666, |
|
"loss": 2.2124, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002888888888888889, |
|
"loss": 2.0872, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003611111111111111, |
|
"loss": 1.9608, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004333333333333333, |
|
"loss": 1.9254, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005055555555555555, |
|
"loss": 1.8716, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005777777777777778, |
|
"loss": 1.8424, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00065, |
|
"loss": 1.8139, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006499909512851264, |
|
"loss": 1.8298, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0006499638056443784, |
|
"loss": 1.735, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0006499185645893443, |
|
"loss": 1.707, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0006498552306392452, |
|
"loss": 1.7144, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0006497738073207941, |
|
"loss": 1.6774, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0006496742991679994, |
|
"loss": 1.6373, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0006495567117219131, |
|
"loss": 1.5874, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0006494210515303213, |
|
"loss": 1.5767, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0006492673261473803, |
|
"loss": 1.3921, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0006490955441331957, |
|
"loss": 1.2696, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0006489057150533456, |
|
"loss": 1.065, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0006486978494783486, |
|
"loss": 0.9967, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0006484719589830741, |
|
"loss": 0.8554, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0006482280561460987, |
|
"loss": 0.8944, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006479661545490054, |
|
"loss": 0.8563, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000647686268775627, |
|
"loss": 0.9036, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 0.782578706741333, |
|
"eval_runtime": 47.3187, |
|
"eval_samples_per_second": 2.113, |
|
"eval_steps_per_second": 0.528, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006473884144112352, |
|
"loss": 0.8622, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006470726080416708, |
|
"loss": 0.8543, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000646738867252422, |
|
"loss": 0.821, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006463872106276441, |
|
"loss": 0.798, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006460176577491251, |
|
"loss": 0.7795, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006456302291951948, |
|
"loss": 0.845, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006452249465395796, |
|
"loss": 0.8199, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0006448018323502008, |
|
"loss": 0.8371, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0006443609101879176, |
|
"loss": 0.8235, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0006439022046052159, |
|
"loss": 0.8063, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0006434257411448404, |
|
"loss": 0.8153, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0006429315463383726, |
|
"loss": 0.8219, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0006424196477047534, |
|
"loss": 0.8167, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0006418900737487508, |
|
"loss": 0.7766, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0006413428539593724, |
|
"loss": 0.8465, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0006407780188082232, |
|
"loss": 0.7966, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0006401955997478094, |
|
"loss": 0.8225, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0006395956292097865, |
|
"loss": 0.7915, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0006389781406031534, |
|
"loss": 0.7923, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0006383431683123921, |
|
"loss": 0.8051, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0006376907476955534, |
|
"loss": 0.8127, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000637020915082287, |
|
"loss": 0.7747, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00063633370777182, |
|
"loss": 0.797, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0006356291640308783, |
|
"loss": 0.743, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000634907323091557, |
|
"loss": 0.8201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.7397407293319702, |
|
"eval_runtime": 47.3134, |
|
"eval_samples_per_second": 2.114, |
|
"eval_steps_per_second": 0.528, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000634168225149135, |
|
"loss": 0.8621, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006334119113598372, |
|
"loss": 0.8159, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006326384238385426, |
|
"loss": 0.7929, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006318478056564394, |
|
"loss": 0.8438, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006310401008386257, |
|
"loss": 0.7576, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006302153543616592, |
|
"loss": 0.7918, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006293736121510519, |
|
"loss": 0.8314, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006285149210787133, |
|
"loss": 0.7491, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006276393289603396, |
|
"loss": 0.78, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006267468845527521, |
|
"loss": 0.786, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006258376375511813, |
|
"loss": 0.7788, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006249116385865004, |
|
"loss": 0.7504, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0006239689392224053, |
|
"loss": 0.7997, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0006230095919525438, |
|
"loss": 0.7863, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0006220336501975922, |
|
"loss": 0.7892, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0006210411683022809, |
|
"loss": 0.7872, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0006200322015323679, |
|
"loss": 0.7779, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0006190068060715616, |
|
"loss": 0.8178, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0006179650390183923, |
|
"loss": 0.7521, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0006169069583830324, |
|
"loss": 0.8174, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0006158326230840664, |
|
"loss": 0.7539, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0006147420929452101, |
|
"loss": 0.7906, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0006136354286919789, |
|
"loss": 0.7639, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000612512691948307, |
|
"loss": 0.7884, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0006113739452331156, |
|
"loss": 0.809, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.7233907580375671, |
|
"eval_runtime": 47.3612, |
|
"eval_samples_per_second": 2.111, |
|
"eval_steps_per_second": 0.528, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0006102192519568312, |
|
"loss": 0.7317, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0006090486764178553, |
|
"loss": 0.7376, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0006078622837989834, |
|
"loss": 0.7396, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0006066601401637757, |
|
"loss": 0.7247, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0006054423124528785, |
|
"loss": 0.7662, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0006042088684802962, |
|
"loss": 0.7529, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0006029598769296152, |
|
"loss": 0.7951, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0006016954073501798, |
|
"loss": 0.774, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0006004155301532189, |
|
"loss": 0.7052, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005991203166079252, |
|
"loss": 0.6928, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005978098388374867, |
|
"loss": 0.7521, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000596484169815071, |
|
"loss": 0.7285, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000595143383359761, |
|
"loss": 0.6915, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000593787554132445, |
|
"loss": 0.7301, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000592416757631659, |
|
"loss": 0.7669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005910310701893825, |
|
"loss": 0.7103, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005896305689667885, |
|
"loss": 0.7646, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005882153319499459, |
|
"loss": 0.743, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005867854379454778, |
|
"loss": 0.6911, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005853409665761729, |
|
"loss": 0.7662, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005838819982765514, |
|
"loss": 0.7422, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005824086142883867, |
|
"loss": 0.8344, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000580920896656181, |
|
"loss": 0.7714, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005794189282225969, |
|
"loss": 0.6885, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005779027926238441, |
|
"loss": 0.7355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.7278771996498108, |
|
"eval_runtime": 47.3492, |
|
"eval_samples_per_second": 2.112, |
|
"eval_steps_per_second": 0.528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005763725742850227, |
|
"loss": 0.7363, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005748283584154215, |
|
"loss": 0.7931, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005732702310037735, |
|
"loss": 0.7028, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005716982788134676, |
|
"loss": 0.7677, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000570112589377717, |
|
"loss": 0.7406, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005685132509946853, |
|
"loss": 0.7332, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005669003527225695, |
|
"loss": 0.729, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005652739843746411, |
|
"loss": 0.7529, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005636342365142442, |
|
"loss": 0.7445, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005619812004497534, |
|
"loss": 0.7615, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005603149682294887, |
|
"loss": 0.7293, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005586356326365904, |
|
"loss": 0.7309, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005569432871838522, |
|
"loss": 0.7496, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005552380261085136, |
|
"loss": 0.7039, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005535199443670132, |
|
"loss": 0.7672, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005517891376297008, |
|
"loss": 0.7628, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005500457022755095, |
|
"loss": 0.7324, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005482897353865897, |
|
"loss": 0.7217, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005465213347429026, |
|
"loss": 0.7041, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005447405988167756, |
|
"loss": 0.7194, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000542947626767419, |
|
"loss": 0.7496, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005411425184354042, |
|
"loss": 0.7347, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005393253743371041, |
|
"loss": 0.7196, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005374962956590962, |
|
"loss": 0.7344, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005356553842525278, |
|
"loss": 0.7568, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.7157579064369202, |
|
"eval_runtime": 47.3884, |
|
"eval_samples_per_second": 2.11, |
|
"eval_steps_per_second": 0.528, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0005338027426274445, |
|
"loss": 0.7667, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005319384739470821, |
|
"loss": 0.7153, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005300626820221224, |
|
"loss": 0.7016, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005281754713049115, |
|
"loss": 0.6928, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005262769468836446, |
|
"loss": 0.7486, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005243672144765132, |
|
"loss": 0.7203, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0005224463804258194, |
|
"loss": 0.7265, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0005205145516920532, |
|
"loss": 0.6776, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0005185718358479369, |
|
"loss": 0.7298, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0005166183410724353, |
|
"loss": 0.7512, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005146541761447313, |
|
"loss": 0.7544, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005126794504381689, |
|
"loss": 0.7438, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005106942739141625, |
|
"loss": 0.7278, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005086987571160746, |
|
"loss": 0.7458, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005066930111630589, |
|
"loss": 0.8019, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005046771477438739, |
|
"loss": 0.7742, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000502651279110663, |
|
"loss": 0.7462, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005006155180727038, |
|
"loss": 0.7004, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004985699779901268, |
|
"loss": 0.7311, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004965147727676027, |
|
"loss": 0.7191, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004944500168479995, |
|
"loss": 0.7011, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004923758252060099, |
|
"loss": 0.7394, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004902923133417497, |
|
"loss": 0.7402, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048819959727432503, |
|
"loss": 0.6991, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000486097793535373, |
|
"loss": 0.721, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.7129714488983154, |
|
"eval_runtime": 47.3388, |
|
"eval_samples_per_second": 2.112, |
|
"eval_steps_per_second": 0.528, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004839870191625722, |
|
"loss": 0.6554, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048186739169312517, |
|
"loss": 0.6815, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004797390291572145, |
|
"loss": 0.7029, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004776020500714291, |
|
"loss": 0.7292, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004754565734321654, |
|
"loss": 0.6858, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004733027187090013, |
|
"loss": 0.7268, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004711406058380429, |
|
"loss": 0.6895, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00046897035521524673, |
|
"loss": 0.6827, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004667920876897147, |
|
"loss": 0.7203, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004646059245569654, |
|
"loss": 0.7267, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004624119875521793, |
|
"loss": 0.7001, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004602103988434206, |
|
"loss": 0.6973, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00045800128102483376, |
|
"loss": 0.7663, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004557847571098175, |
|
"loss": 0.699, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004535609505241742, |
|
"loss": 0.7609, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00045132998509923763, |
|
"loss": 0.6936, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004490919850649773, |
|
"loss": 0.6748, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004468470750430804, |
|
"loss": 0.6764, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00044459538004001275, |
|
"loss": 0.7278, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000442337025440058, |
|
"loss": 0.7622, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004400721369983352, |
|
"loss": 0.7125, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004378008408337966, |
|
"loss": 0.7045, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004355232634222044, |
|
"loss": 0.6474, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004332395315890887, |
|
"loss": 0.7072, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004309497725026844, |
|
"loss": 0.71, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.7100925445556641, |
|
"eval_runtime": 47.2576, |
|
"eval_samples_per_second": 2.116, |
|
"eval_steps_per_second": 0.529, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004286541136668506, |
|
"loss": 0.7112, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00042635268291397056, |
|
"loss": 0.7192, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00042404560839783294, |
|
"loss": 0.7662, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004217330185864961, |
|
"loss": 0.7295, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00041941504225513447, |
|
"loss": 0.7412, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004170918084788675, |
|
"loss": 0.7122, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004147634466255721, |
|
"loss": 0.7375, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004124300863486793, |
|
"loss": 0.7056, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00041009185757995433, |
|
"loss": 0.7127, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00040774889052226133, |
|
"loss": 0.699, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004054013156423135, |
|
"loss": 0.7063, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00040304926366340746, |
|
"loss": 0.7145, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00040069286555814465, |
|
"loss": 0.7283, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00039833225254113795, |
|
"loss": 0.7117, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000395967556061705, |
|
"loss": 0.6506, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00039359890779654843, |
|
"loss": 0.7239, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000391226439642424, |
|
"loss": 0.7084, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003888502837087955, |
|
"loss": 0.6888, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003864705723104786, |
|
"loss": 0.6919, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00038408743796027284, |
|
"loss": 0.7193, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00038170101336158275, |
|
"loss": 0.6699, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0003793114314010282, |
|
"loss": 0.6661, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00037691882514104546, |
|
"loss": 0.7258, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00037452332781247636, |
|
"loss": 0.678, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00037212507280715056, |
|
"loss": 0.7179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.7194330096244812, |
|
"eval_runtime": 47.2733, |
|
"eval_samples_per_second": 2.115, |
|
"eval_steps_per_second": 0.529, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0003697241936704573, |
|
"loss": 0.6818, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00036732082409390886, |
|
"loss": 0.6697, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00036491509790769616, |
|
"loss": 0.7394, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0003625071490732366, |
|
"loss": 0.6873, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003600971116757144, |
|
"loss": 0.7043, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003576851199166141, |
|
"loss": 0.6955, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00035527130810624776, |
|
"loss": 0.7175, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00035285581065627595, |
|
"loss": 0.7508, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000350438762072223, |
|
"loss": 0.6486, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00034802029694598705, |
|
"loss": 0.6568, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0003456005499483456, |
|
"loss": 0.7239, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00034317965582145686, |
|
"loss": 0.7016, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00034075774937135557, |
|
"loss": 0.687, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00033833496546044746, |
|
"loss": 0.6799, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00033591143899999905, |
|
"loss": 0.6837, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00033348730494262534, |
|
"loss": 0.7378, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0003310626982747749, |
|
"loss": 0.6542, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0003286377540092133, |
|
"loss": 0.6899, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00032621260717750515, |
|
"loss": 0.6803, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0003237873928224948, |
|
"loss": 0.6789, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00032136224599078677, |
|
"loss": 0.7018, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0003189373017252252, |
|
"loss": 0.6739, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0003165126950573747, |
|
"loss": 0.6385, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0003140885610000009, |
|
"loss": 0.6665, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00031166503453955246, |
|
"loss": 0.6987, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.7022287249565125, |
|
"eval_runtime": 47.2534, |
|
"eval_samples_per_second": 2.116, |
|
"eval_steps_per_second": 0.529, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00030924225062864435, |
|
"loss": 0.6822, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00030682034417854327, |
|
"loss": 0.6643, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0003043994500516544, |
|
"loss": 0.6735, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0003019797030540131, |
|
"loss": 0.6723, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002995612379277771, |
|
"loss": 0.6638, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.000297144189343724, |
|
"loss": 0.6975, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00029472869189375227, |
|
"loss": 0.7059, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00029231488008338595, |
|
"loss": 0.7657, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002899028883242856, |
|
"loss": 0.707, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00028749285092676344, |
|
"loss": 0.6707, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00028508490209230386, |
|
"loss": 0.7009, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00028267917590609116, |
|
"loss": 0.6248, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002802758063295427, |
|
"loss": 0.6933, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00027787492719284936, |
|
"loss": 0.6744, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002754766721875236, |
|
"loss": 0.6926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002730811748589547, |
|
"loss": 0.725, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002706885685989718, |
|
"loss": 0.73, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002682989866384173, |
|
"loss": 0.6588, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002659125620397272, |
|
"loss": 0.6382, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002635294276895214, |
|
"loss": 0.6902, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00026114971629120453, |
|
"loss": 0.6632, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000258773560357576, |
|
"loss": 0.6414, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002564010922034516, |
|
"loss": 0.6523, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00025403244393829504, |
|
"loss": 0.7573, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.000251667747458862, |
|
"loss": 0.7295, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.7042359709739685, |
|
"eval_runtime": 47.2541, |
|
"eval_samples_per_second": 2.116, |
|
"eval_steps_per_second": 0.529, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002493071344418553, |
|
"loss": 0.7289, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024695073633659246, |
|
"loss": 0.6323, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024459868435768646, |
|
"loss": 0.6575, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024225110947773856, |
|
"loss": 0.7192, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00023990814242004572, |
|
"loss": 0.6669, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00023756991365132073, |
|
"loss": 0.6991, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.000235236553374428, |
|
"loss": 0.63, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00023290819152113257, |
|
"loss": 0.7387, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00023058495774486555, |
|
"loss": 0.6548, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00022826698141350396, |
|
"loss": 0.6401, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00022595439160216711, |
|
"loss": 0.6642, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002236473170860295, |
|
"loss": 0.7435, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00022134588633314936, |
|
"loss": 0.6788, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00021905022749731565, |
|
"loss": 0.7188, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00021676046841091133, |
|
"loss": 0.6899, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002144767365777955, |
|
"loss": 0.6762, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002121991591662034, |
|
"loss": 0.6592, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00020992786300166473, |
|
"loss": 0.6636, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00020766297455994203, |
|
"loss": 0.6471, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00020540461995998728, |
|
"loss": 0.6782, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00020315292495691971, |
|
"loss": 0.6857, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00020090801493502278, |
|
"loss": 0.6337, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001986700149007624, |
|
"loss": 0.6529, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001964390494758258, |
|
"loss": 0.6749, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00019421524289018252, |
|
"loss": 0.6614, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.704497754573822, |
|
"eval_runtime": 47.2368, |
|
"eval_samples_per_second": 2.117, |
|
"eval_steps_per_second": 0.529, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001919987189751662, |
|
"loss": 0.6318, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018978960115657945, |
|
"loss": 0.6396, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018758801244782072, |
|
"loss": 0.6918, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001853940754430346, |
|
"loss": 0.7145, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018320791231028527, |
|
"loss": 0.7019, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018102964478475318, |
|
"loss": 0.7036, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001788593941619569, |
|
"loss": 0.6391, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017669728129099881, |
|
"loss": 0.6977, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017454342656783464, |
|
"loss": 0.6911, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000172397949928571, |
|
"loss": 0.731, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017026097084278557, |
|
"loss": 0.675, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016813260830687474, |
|
"loss": 0.6953, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016601298083742793, |
|
"loss": 0.6629, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016390220646462695, |
|
"loss": 0.6865, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016180040272567491, |
|
"loss": 0.6926, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00015970768665825037, |
|
"loss": 0.7088, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00015762417479399005, |
|
"loss": 0.6628, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001555499831520006, |
|
"loss": 0.6601, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00015348522723239726, |
|
"loss": 0.6605, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00015143002200987304, |
|
"loss": 0.6514, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00014938448192729616, |
|
"loss": 0.6739, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00014734872088933713, |
|
"loss": 0.6996, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00014532285225612613, |
|
"loss": 0.6322, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00014330698883694115, |
|
"loss": 0.6382, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00014130124288392538, |
|
"loss": 0.6661, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.7015612721443176, |
|
"eval_runtime": 47.2358, |
|
"eval_samples_per_second": 2.117, |
|
"eval_steps_per_second": 0.529, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 7.322913306456883e+17, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|