|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 2000, |
|
"global_step": 60928, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.967174369747899e-05, |
|
"loss": 3.2651, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.934348739495799e-05, |
|
"loss": 1.7248, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.901523109243697e-05, |
|
"loss": 1.3764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.868697478991597e-05, |
|
"loss": 1.1757, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_e": 0.581678956324447, |
|
"eval_f1": 0.5311118764265024, |
|
"eval_loss": 1.6264814138412476, |
|
"eval_runtime": 40.5361, |
|
"eval_samples_per_second": 86.984, |
|
"eval_steps_per_second": 0.691, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.835871848739496e-05, |
|
"loss": 1.0579, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8030462184873956e-05, |
|
"loss": 0.9707, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7702205882352946e-05, |
|
"loss": 0.9118, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7373949579831936e-05, |
|
"loss": 0.8875, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_e": 0.6534316505955757, |
|
"eval_f1": 0.596454998267645, |
|
"eval_loss": 1.359203577041626, |
|
"eval_runtime": 41.4011, |
|
"eval_samples_per_second": 85.167, |
|
"eval_steps_per_second": 0.676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7045693277310926e-05, |
|
"loss": 0.8308, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6717436974789916e-05, |
|
"loss": 0.8191, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.638918067226891e-05, |
|
"loss": 0.8039, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.6060924369747897e-05, |
|
"loss": 0.7682, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_e": 0.6781055019852524, |
|
"eval_f1": 0.6213789300225637, |
|
"eval_loss": 1.1949362754821777, |
|
"eval_runtime": 40.0845, |
|
"eval_samples_per_second": 87.964, |
|
"eval_steps_per_second": 0.699, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.5732668067226893e-05, |
|
"loss": 0.7669, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5404411764705883e-05, |
|
"loss": 0.7246, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.507615546218488e-05, |
|
"loss": 0.723, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.474789915966387e-05, |
|
"loss": 0.7023, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_e": 0.7073170731707317, |
|
"eval_f1": 0.6500612956253415, |
|
"eval_loss": 1.1214724779129028, |
|
"eval_runtime": 36.2407, |
|
"eval_samples_per_second": 97.294, |
|
"eval_steps_per_second": 0.773, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.4419642857142854e-05, |
|
"loss": 0.6801, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.409138655462185e-05, |
|
"loss": 0.6614, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.376313025210084e-05, |
|
"loss": 0.6895, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.343487394957984e-05, |
|
"loss": 0.6633, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_e": 0.707884288145207, |
|
"eval_f1": 0.6365888205833479, |
|
"eval_loss": 1.1668164730072021, |
|
"eval_runtime": 36.2099, |
|
"eval_samples_per_second": 97.377, |
|
"eval_steps_per_second": 0.773, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.310661764705883e-05, |
|
"loss": 0.6346, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.277836134453782e-05, |
|
"loss": 0.6376, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.245010504201681e-05, |
|
"loss": 0.654, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.21218487394958e-05, |
|
"loss": 0.6169, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_e": 0.7271695972773681, |
|
"eval_f1": 0.6654857811733846, |
|
"eval_loss": 1.0702860355377197, |
|
"eval_runtime": 36.2662, |
|
"eval_samples_per_second": 97.226, |
|
"eval_steps_per_second": 0.772, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.1793592436974794e-05, |
|
"loss": 0.6161, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.1465336134453784e-05, |
|
"loss": 0.6157, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.1137079831932774e-05, |
|
"loss": 0.618, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.0808823529411765e-05, |
|
"loss": 0.604, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_e": 0.7399319342030629, |
|
"eval_f1": 0.6704723459995169, |
|
"eval_loss": 0.9919618964195251, |
|
"eval_runtime": 36.2385, |
|
"eval_samples_per_second": 97.3, |
|
"eval_steps_per_second": 0.773, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.048056722689076e-05, |
|
"loss": 0.6043, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.015231092436975e-05, |
|
"loss": 0.579, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.982405462184874e-05, |
|
"loss": 0.5503, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.949579831932773e-05, |
|
"loss": 0.5093, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_e": 0.7359614293817357, |
|
"eval_f1": 0.6659783311586052, |
|
"eval_loss": 0.9624159336090088, |
|
"eval_runtime": 36.2264, |
|
"eval_samples_per_second": 97.332, |
|
"eval_steps_per_second": 0.773, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.916754201680672e-05, |
|
"loss": 0.5139, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.883928571428572e-05, |
|
"loss": 0.5018, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.851102941176471e-05, |
|
"loss": 0.5009, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.81827731092437e-05, |
|
"loss": 0.5068, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_e": 0.7464549064095292, |
|
"eval_f1": 0.6771850837420853, |
|
"eval_loss": 1.0379488468170166, |
|
"eval_runtime": 36.2397, |
|
"eval_samples_per_second": 97.297, |
|
"eval_steps_per_second": 0.773, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.785451680672269e-05, |
|
"loss": 0.4928, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.7526260504201685e-05, |
|
"loss": 0.5026, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.7198004201680675e-05, |
|
"loss": 0.4938, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.6869747899159665e-05, |
|
"loss": 0.4812, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_e": 0.742484401588202, |
|
"eval_f1": 0.6734074837554972, |
|
"eval_loss": 1.0004695653915405, |
|
"eval_runtime": 36.2259, |
|
"eval_samples_per_second": 97.334, |
|
"eval_steps_per_second": 0.773, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.6541491596638656e-05, |
|
"loss": 0.4877, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.6213235294117646e-05, |
|
"loss": 0.4823, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.588497899159664e-05, |
|
"loss": 0.4708, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.555672268907563e-05, |
|
"loss": 0.4831, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_e": 0.7433352240499149, |
|
"eval_f1": 0.6745692090035141, |
|
"eval_loss": 1.0472208261489868, |
|
"eval_runtime": 36.2204, |
|
"eval_samples_per_second": 97.348, |
|
"eval_steps_per_second": 0.773, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.522846638655463e-05, |
|
"loss": 0.4716, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.490021008403361e-05, |
|
"loss": 0.4716, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.45719537815126e-05, |
|
"loss": 0.474, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.42436974789916e-05, |
|
"loss": 0.4748, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_e": 0.7552467385138968, |
|
"eval_f1": 0.6830863599993048, |
|
"eval_loss": 0.9445247650146484, |
|
"eval_runtime": 41.2911, |
|
"eval_samples_per_second": 85.394, |
|
"eval_steps_per_second": 0.678, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.391544117647059e-05, |
|
"loss": 0.4668, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.358718487394958e-05, |
|
"loss": 0.467, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.325892857142857e-05, |
|
"loss": 0.4517, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.2930672268907566e-05, |
|
"loss": 0.4581, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_e": 0.7552467385138968, |
|
"eval_f1": 0.6822537839548614, |
|
"eval_loss": 0.9966788291931152, |
|
"eval_runtime": 36.282, |
|
"eval_samples_per_second": 97.183, |
|
"eval_steps_per_second": 0.772, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.2602415966386556e-05, |
|
"loss": 0.458, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.2274159663865547e-05, |
|
"loss": 0.441, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.1945903361344537e-05, |
|
"loss": 0.4703, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.161764705882353e-05, |
|
"loss": 0.4438, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_e": 0.7487237663074305, |
|
"eval_f1": 0.6789318734426844, |
|
"eval_loss": 0.9515223503112793, |
|
"eval_runtime": 39.9941, |
|
"eval_samples_per_second": 88.163, |
|
"eval_steps_per_second": 0.7, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.1289390756302523e-05, |
|
"loss": 0.4556, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.0961134453781514e-05, |
|
"loss": 0.4453, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.063287815126051e-05, |
|
"loss": 0.4415, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.0304621848739494e-05, |
|
"loss": 0.4417, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_e": 0.7577992058990357, |
|
"eval_f1": 0.686996111301651, |
|
"eval_loss": 0.9782966375350952, |
|
"eval_runtime": 36.255, |
|
"eval_samples_per_second": 97.256, |
|
"eval_steps_per_second": 0.772, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9976365546218487e-05, |
|
"loss": 0.4254, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.964810924369748e-05, |
|
"loss": 0.3574, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.9319852941176474e-05, |
|
"loss": 0.3596, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.8991596638655467e-05, |
|
"loss": 0.3692, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_e": 0.7524106636415201, |
|
"eval_f1": 0.6861686413926235, |
|
"eval_loss": 1.0621048212051392, |
|
"eval_runtime": 36.2047, |
|
"eval_samples_per_second": 97.391, |
|
"eval_steps_per_second": 0.773, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.8663340336134454e-05, |
|
"loss": 0.3673, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.8335084033613447e-05, |
|
"loss": 0.3683, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.8006827731092438e-05, |
|
"loss": 0.3538, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.767857142857143e-05, |
|
"loss": 0.3555, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_e": 0.7620533182076007, |
|
"eval_f1": 0.6877485506353153, |
|
"eval_loss": 1.040844202041626, |
|
"eval_runtime": 36.2597, |
|
"eval_samples_per_second": 97.243, |
|
"eval_steps_per_second": 0.772, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.7350315126050424e-05, |
|
"loss": 0.3718, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.702205882352941e-05, |
|
"loss": 0.3557, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.6693802521008405e-05, |
|
"loss": 0.3459, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6365546218487398e-05, |
|
"loss": 0.3657, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_e": 0.7597844583096994, |
|
"eval_f1": 0.6853469432427786, |
|
"eval_loss": 1.1275439262390137, |
|
"eval_runtime": 40.6163, |
|
"eval_samples_per_second": 86.813, |
|
"eval_steps_per_second": 0.689, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.6037289915966388e-05, |
|
"loss": 0.3593, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.5709033613445378e-05, |
|
"loss": 0.3468, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.5380777310924368e-05, |
|
"loss": 0.3475, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.505252100840336e-05, |
|
"loss": 0.3504, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_e": 0.7603516732841747, |
|
"eval_f1": 0.6846985297135735, |
|
"eval_loss": 1.0078964233398438, |
|
"eval_runtime": 36.2227, |
|
"eval_samples_per_second": 97.342, |
|
"eval_steps_per_second": 0.773, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.4724264705882355e-05, |
|
"loss": 0.3574, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.4396008403361345e-05, |
|
"loss": 0.3543, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.406775210084034e-05, |
|
"loss": 0.3339, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.373949579831933e-05, |
|
"loss": 0.3573, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_e": 0.7589336358479863, |
|
"eval_f1": 0.683545228612745, |
|
"eval_loss": 1.0078063011169434, |
|
"eval_runtime": 36.2573, |
|
"eval_samples_per_second": 97.25, |
|
"eval_steps_per_second": 0.772, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3411239495798322e-05, |
|
"loss": 0.3583, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.3082983193277312e-05, |
|
"loss": 0.346, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2754726890756302e-05, |
|
"loss": 0.3468, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2426470588235296e-05, |
|
"loss": 0.3409, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_e": 0.7552467385138968, |
|
"eval_f1": 0.6810380542275204, |
|
"eval_loss": 1.08004629611969, |
|
"eval_runtime": 36.2112, |
|
"eval_samples_per_second": 97.373, |
|
"eval_steps_per_second": 0.773, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.2098214285714286e-05, |
|
"loss": 0.3501, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.176995798319328e-05, |
|
"loss": 0.3361, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.144170168067227e-05, |
|
"loss": 0.3452, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1113445378151263e-05, |
|
"loss": 0.3602, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_e": 0.7609188882586501, |
|
"eval_f1": 0.6832601552139036, |
|
"eval_loss": 1.031318187713623, |
|
"eval_runtime": 36.2238, |
|
"eval_samples_per_second": 97.339, |
|
"eval_steps_per_second": 0.773, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.0785189075630253e-05, |
|
"loss": 0.3473, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.0456932773109243e-05, |
|
"loss": 0.3309, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.0128676470588236e-05, |
|
"loss": 0.3342, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.9800420168067226e-05, |
|
"loss": 0.3, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_e": 0.7577992058990357, |
|
"eval_f1": 0.6814344985190464, |
|
"eval_loss": 1.0859274864196777, |
|
"eval_runtime": 36.2358, |
|
"eval_samples_per_second": 97.307, |
|
"eval_steps_per_second": 0.773, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.947216386554622e-05, |
|
"loss": 0.2727, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.9143907563025213e-05, |
|
"loss": 0.271, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.8815651260504203e-05, |
|
"loss": 0.2777, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.8487394957983196e-05, |
|
"loss": 0.2616, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_e": 0.7623369256948384, |
|
"eval_f1": 0.6829232660093741, |
|
"eval_loss": 1.1106504201889038, |
|
"eval_runtime": 36.1973, |
|
"eval_samples_per_second": 97.41, |
|
"eval_steps_per_second": 0.774, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.8159138655462187e-05, |
|
"loss": 0.2702, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.7830882352941177e-05, |
|
"loss": 0.2665, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.7502626050420167e-05, |
|
"loss": 0.2661, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.717436974789916e-05, |
|
"loss": 0.2728, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_e": 0.7555303460011344, |
|
"eval_f1": 0.680738586407374, |
|
"eval_loss": 1.158908724784851, |
|
"eval_runtime": 36.2449, |
|
"eval_samples_per_second": 97.283, |
|
"eval_steps_per_second": 0.773, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.6846113445378154e-05, |
|
"loss": 0.2753, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.6517857142857144e-05, |
|
"loss": 0.2717, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6189600840336137e-05, |
|
"loss": 0.2723, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5861344537815127e-05, |
|
"loss": 0.2738, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_e": 0.7648893930799773, |
|
"eval_f1": 0.6879751943104644, |
|
"eval_loss": 1.1301259994506836, |
|
"eval_runtime": 41.2984, |
|
"eval_samples_per_second": 85.379, |
|
"eval_steps_per_second": 0.678, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.5533088235294117e-05, |
|
"loss": 0.2692, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.5204831932773109e-05, |
|
"loss": 0.2714, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.48765756302521e-05, |
|
"loss": 0.2646, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.4548319327731094e-05, |
|
"loss": 0.2664, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_e": 0.7688598979013046, |
|
"eval_f1": 0.6891421496451101, |
|
"eval_loss": 1.0957111120224, |
|
"eval_runtime": 36.2366, |
|
"eval_samples_per_second": 97.305, |
|
"eval_steps_per_second": 0.773, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.4220063025210084e-05, |
|
"loss": 0.2683, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.3891806722689078e-05, |
|
"loss": 0.2698, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.3563550420168068e-05, |
|
"loss": 0.2709, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.2737, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_e": 0.7631877481565513, |
|
"eval_f1": 0.6905602249087808, |
|
"eval_loss": 1.0759004354476929, |
|
"eval_runtime": 36.2232, |
|
"eval_samples_per_second": 97.341, |
|
"eval_steps_per_second": 0.773, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.2907037815126053e-05, |
|
"loss": 0.2703, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2578781512605043e-05, |
|
"loss": 0.2609, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.2250525210084033e-05, |
|
"loss": 0.2616, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1922268907563026e-05, |
|
"loss": 0.2784, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_e": 0.7614861032331254, |
|
"eval_f1": 0.6854242844445689, |
|
"eval_loss": 1.0705878734588623, |
|
"eval_runtime": 36.2079, |
|
"eval_samples_per_second": 97.382, |
|
"eval_steps_per_second": 0.773, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.1594012605042018e-05, |
|
"loss": 0.2701, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.126575630252101e-05, |
|
"loss": 0.2647, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 0.2608, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.0609243697478992e-05, |
|
"loss": 0.2622, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_e": 0.7612024957458877, |
|
"eval_f1": 0.6860082711191074, |
|
"eval_loss": 1.1619102954864502, |
|
"eval_runtime": 36.2375, |
|
"eval_samples_per_second": 97.303, |
|
"eval_steps_per_second": 0.773, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.0280987394957983e-05, |
|
"loss": 0.2547, |
|
"step": 60500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 76160, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1.9495781319278917e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|