|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 120340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.979225527671597e-05, |
|
"loss": 1.2637, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_f1": 0.2863453369025287, |
|
"eval_loss": 1.204175591468811, |
|
"eval_runtime": 51.6606, |
|
"eval_samples_per_second": 125.473, |
|
"eval_steps_per_second": 7.859, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.958451055343195e-05, |
|
"loss": 1.213, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_f1": 0.274772728922474, |
|
"eval_loss": 1.2542401552200317, |
|
"eval_runtime": 46.4286, |
|
"eval_samples_per_second": 139.612, |
|
"eval_steps_per_second": 8.745, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.937676583014792e-05, |
|
"loss": 1.1852, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_f1": 0.31238188693470137, |
|
"eval_loss": 1.1441909074783325, |
|
"eval_runtime": 46.2394, |
|
"eval_samples_per_second": 140.183, |
|
"eval_steps_per_second": 8.78, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9169021106863886e-05, |
|
"loss": 1.1495, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_f1": 0.3303277428471857, |
|
"eval_loss": 1.19601309299469, |
|
"eval_runtime": 46.3042, |
|
"eval_samples_per_second": 139.987, |
|
"eval_steps_per_second": 8.768, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8961276383579855e-05, |
|
"loss": 1.1406, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_f1": 0.33363478480748904, |
|
"eval_loss": 1.2050482034683228, |
|
"eval_runtime": 46.0365, |
|
"eval_samples_per_second": 140.801, |
|
"eval_steps_per_second": 8.819, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.875353166029583e-05, |
|
"loss": 1.1332, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_f1": 0.35687474820142895, |
|
"eval_loss": 1.1201504468917847, |
|
"eval_runtime": 45.6751, |
|
"eval_samples_per_second": 141.915, |
|
"eval_steps_per_second": 8.889, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.85457869370118e-05, |
|
"loss": 1.1007, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_f1": 0.3605293337176752, |
|
"eval_loss": 1.1953203678131104, |
|
"eval_runtime": 45.2576, |
|
"eval_samples_per_second": 143.225, |
|
"eval_steps_per_second": 8.971, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.833804221372778e-05, |
|
"loss": 1.1157, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_f1": 0.3862175225700333, |
|
"eval_loss": 1.1009345054626465, |
|
"eval_runtime": 46.0236, |
|
"eval_samples_per_second": 140.841, |
|
"eval_steps_per_second": 8.822, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8130297490443746e-05, |
|
"loss": 1.1172, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_f1": 0.3819750143486552, |
|
"eval_loss": 1.126935362815857, |
|
"eval_runtime": 46.0628, |
|
"eval_samples_per_second": 140.721, |
|
"eval_steps_per_second": 8.814, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.7922552767159715e-05, |
|
"loss": 1.1041, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_f1": 0.29887014740598117, |
|
"eval_loss": 1.1668146848678589, |
|
"eval_runtime": 46.4347, |
|
"eval_samples_per_second": 139.594, |
|
"eval_steps_per_second": 8.743, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7714808043875684e-05, |
|
"loss": 1.102, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_f1": 0.418642853322715, |
|
"eval_loss": 1.1066502332687378, |
|
"eval_runtime": 46.0383, |
|
"eval_samples_per_second": 140.796, |
|
"eval_steps_per_second": 8.819, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.750706332059166e-05, |
|
"loss": 1.0878, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_f1": 0.3200899667689199, |
|
"eval_loss": 1.1729530096054077, |
|
"eval_runtime": 46.8484, |
|
"eval_samples_per_second": 138.361, |
|
"eval_steps_per_second": 8.666, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.729931859730763e-05, |
|
"loss": 1.0866, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_f1": 0.38888205294003, |
|
"eval_loss": 1.108739972114563, |
|
"eval_runtime": 46.5428, |
|
"eval_samples_per_second": 139.27, |
|
"eval_steps_per_second": 8.723, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7091573874023606e-05, |
|
"loss": 1.0729, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_f1": 0.335409883600229, |
|
"eval_loss": 1.1224578619003296, |
|
"eval_runtime": 46.3948, |
|
"eval_samples_per_second": 139.714, |
|
"eval_steps_per_second": 8.751, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6883829150739575e-05, |
|
"loss": 1.0684, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_f1": 0.4087602196471012, |
|
"eval_loss": 1.1329175233840942, |
|
"eval_runtime": 46.5357, |
|
"eval_samples_per_second": 139.291, |
|
"eval_steps_per_second": 8.724, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.6676084427455544e-05, |
|
"loss": 1.0633, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_f1": 0.39803057267400827, |
|
"eval_loss": 1.1004011631011963, |
|
"eval_runtime": 46.1396, |
|
"eval_samples_per_second": 140.487, |
|
"eval_steps_per_second": 8.799, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.6468339704171513e-05, |
|
"loss": 1.0739, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1": 0.39430507046678914, |
|
"eval_loss": 1.090652585029602, |
|
"eval_runtime": 46.3294, |
|
"eval_samples_per_second": 139.911, |
|
"eval_steps_per_second": 8.763, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.626059498088749e-05, |
|
"loss": 1.0646, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_f1": 0.4204523467443143, |
|
"eval_loss": 1.1204614639282227, |
|
"eval_runtime": 46.2687, |
|
"eval_samples_per_second": 140.095, |
|
"eval_steps_per_second": 8.775, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.605285025760346e-05, |
|
"loss": 1.0581, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_f1": 0.3934072615715215, |
|
"eval_loss": 1.100487232208252, |
|
"eval_runtime": 46.7591, |
|
"eval_samples_per_second": 138.625, |
|
"eval_steps_per_second": 8.683, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.5845105534319435e-05, |
|
"loss": 1.0659, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_f1": 0.3959022659523447, |
|
"eval_loss": 1.0948997735977173, |
|
"eval_runtime": 46.4254, |
|
"eval_samples_per_second": 139.622, |
|
"eval_steps_per_second": 8.745, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5637360811035404e-05, |
|
"loss": 1.0573, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_f1": 0.4038876072579517, |
|
"eval_loss": 1.0948611497879028, |
|
"eval_runtime": 46.9168, |
|
"eval_samples_per_second": 138.159, |
|
"eval_steps_per_second": 8.654, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.542961608775137e-05, |
|
"loss": 1.0725, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_f1": 0.3986235498061616, |
|
"eval_loss": 1.1076061725616455, |
|
"eval_runtime": 46.337, |
|
"eval_samples_per_second": 139.888, |
|
"eval_steps_per_second": 8.762, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.522187136446734e-05, |
|
"loss": 1.0453, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_f1": 0.39752123118599975, |
|
"eval_loss": 1.0838735103607178, |
|
"eval_runtime": 46.3441, |
|
"eval_samples_per_second": 139.867, |
|
"eval_steps_per_second": 8.761, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.501412664118332e-05, |
|
"loss": 1.0594, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.3901284493425606, |
|
"eval_loss": 1.0847594738006592, |
|
"eval_runtime": 45.6188, |
|
"eval_samples_per_second": 142.091, |
|
"eval_steps_per_second": 8.9, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.480638191789929e-05, |
|
"loss": 0.9487, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_f1": 0.43667878762469675, |
|
"eval_loss": 1.1431002616882324, |
|
"eval_runtime": 46.1871, |
|
"eval_samples_per_second": 140.342, |
|
"eval_steps_per_second": 8.79, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.4598637194615264e-05, |
|
"loss": 0.9704, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_f1": 0.39215933654644525, |
|
"eval_loss": 1.1027016639709473, |
|
"eval_runtime": 45.9637, |
|
"eval_samples_per_second": 141.024, |
|
"eval_steps_per_second": 8.833, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.439089247133123e-05, |
|
"loss": 0.9469, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_f1": 0.4076438023703015, |
|
"eval_loss": 1.1772775650024414, |
|
"eval_runtime": 45.9053, |
|
"eval_samples_per_second": 141.204, |
|
"eval_steps_per_second": 8.844, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.41831477480472e-05, |
|
"loss": 0.9325, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_f1": 0.4386204591653561, |
|
"eval_loss": 1.1739530563354492, |
|
"eval_runtime": 45.6601, |
|
"eval_samples_per_second": 141.962, |
|
"eval_steps_per_second": 8.892, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.397540302476317e-05, |
|
"loss": 0.9393, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_f1": 0.4275788122291597, |
|
"eval_loss": 1.1776121854782104, |
|
"eval_runtime": 45.9594, |
|
"eval_samples_per_second": 141.037, |
|
"eval_steps_per_second": 8.834, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.376765830147914e-05, |
|
"loss": 0.9358, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_f1": 0.402493107642323, |
|
"eval_loss": 1.1454391479492188, |
|
"eval_runtime": 46.4333, |
|
"eval_samples_per_second": 139.598, |
|
"eval_steps_per_second": 8.744, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.355991357819512e-05, |
|
"loss": 0.9276, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_f1": 0.4309881707776124, |
|
"eval_loss": 1.1370099782943726, |
|
"eval_runtime": 46.0386, |
|
"eval_samples_per_second": 140.795, |
|
"eval_steps_per_second": 8.819, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.3352168854911086e-05, |
|
"loss": 0.9749, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_f1": 0.42666296909338014, |
|
"eval_loss": 1.147721290588379, |
|
"eval_runtime": 46.5256, |
|
"eval_samples_per_second": 139.321, |
|
"eval_steps_per_second": 8.726, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.314442413162706e-05, |
|
"loss": 0.9584, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_f1": 0.3917665752135426, |
|
"eval_loss": 1.1466563940048218, |
|
"eval_runtime": 45.7559, |
|
"eval_samples_per_second": 141.665, |
|
"eval_steps_per_second": 8.873, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.293667940834303e-05, |
|
"loss": 0.9458, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_f1": 0.38855363975832957, |
|
"eval_loss": 1.1946083307266235, |
|
"eval_runtime": 46.3569, |
|
"eval_samples_per_second": 139.828, |
|
"eval_steps_per_second": 8.758, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2728934685059e-05, |
|
"loss": 0.9615, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_f1": 0.4006260567973624, |
|
"eval_loss": 1.1700124740600586, |
|
"eval_runtime": 46.2179, |
|
"eval_samples_per_second": 140.249, |
|
"eval_steps_per_second": 8.784, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.252118996177497e-05, |
|
"loss": 0.949, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_f1": 0.39774908999391234, |
|
"eval_loss": 1.1761705875396729, |
|
"eval_runtime": 46.6777, |
|
"eval_samples_per_second": 138.867, |
|
"eval_steps_per_second": 8.698, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2313445238490946e-05, |
|
"loss": 0.9424, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_f1": 0.4174117585364745, |
|
"eval_loss": 1.165438175201416, |
|
"eval_runtime": 45.4426, |
|
"eval_samples_per_second": 142.641, |
|
"eval_steps_per_second": 8.934, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.2105700515206915e-05, |
|
"loss": 0.947, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_f1": 0.3867006123407769, |
|
"eval_loss": 1.1531673669815063, |
|
"eval_runtime": 46.295, |
|
"eval_samples_per_second": 140.015, |
|
"eval_steps_per_second": 8.77, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.189795579192289e-05, |
|
"loss": 0.938, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_f1": 0.44235664423629284, |
|
"eval_loss": 1.1730421781539917, |
|
"eval_runtime": 45.858, |
|
"eval_samples_per_second": 141.349, |
|
"eval_steps_per_second": 8.853, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.1690211068638854e-05, |
|
"loss": 0.9486, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_f1": 0.41240105931665383, |
|
"eval_loss": 1.1419258117675781, |
|
"eval_runtime": 46.2869, |
|
"eval_samples_per_second": 140.04, |
|
"eval_steps_per_second": 8.771, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.148246634535483e-05, |
|
"loss": 0.9464, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_f1": 0.4092001043387523, |
|
"eval_loss": 1.2019739151000977, |
|
"eval_runtime": 46.6507, |
|
"eval_samples_per_second": 138.948, |
|
"eval_steps_per_second": 8.703, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.12747216220708e-05, |
|
"loss": 0.933, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_f1": 0.4176306552081639, |
|
"eval_loss": 1.1400264501571655, |
|
"eval_runtime": 46.9566, |
|
"eval_samples_per_second": 138.042, |
|
"eval_steps_per_second": 8.646, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.1066976898786775e-05, |
|
"loss": 0.9544, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_f1": 0.430552427353769, |
|
"eval_loss": 1.1604799032211304, |
|
"eval_runtime": 47.2589, |
|
"eval_samples_per_second": 137.159, |
|
"eval_steps_per_second": 8.591, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0859232175502744e-05, |
|
"loss": 0.9312, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_f1": 0.39462194466074857, |
|
"eval_loss": 1.1545989513397217, |
|
"eval_runtime": 46.5344, |
|
"eval_samples_per_second": 139.295, |
|
"eval_steps_per_second": 8.725, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.065148745221872e-05, |
|
"loss": 0.9458, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_f1": 0.44503977528303557, |
|
"eval_loss": 1.1579736471176147, |
|
"eval_runtime": 47.0732, |
|
"eval_samples_per_second": 137.7, |
|
"eval_steps_per_second": 8.625, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.044374272893468e-05, |
|
"loss": 0.9463, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_f1": 0.41566594457572864, |
|
"eval_loss": 1.1216946840286255, |
|
"eval_runtime": 46.6125, |
|
"eval_samples_per_second": 139.061, |
|
"eval_steps_per_second": 8.71, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.023599800565066e-05, |
|
"loss": 0.9292, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_f1": 0.3965530806436371, |
|
"eval_loss": 1.1553888320922852, |
|
"eval_runtime": 46.7596, |
|
"eval_samples_per_second": 138.624, |
|
"eval_steps_per_second": 8.683, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.002825328236663e-05, |
|
"loss": 0.9286, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_f1": 0.4010412760625175, |
|
"eval_loss": 1.1262454986572266, |
|
"eval_runtime": 46.3591, |
|
"eval_samples_per_second": 139.822, |
|
"eval_steps_per_second": 8.758, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.9820508559082604e-05, |
|
"loss": 0.7944, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_f1": 0.43825195451845356, |
|
"eval_loss": 1.2961622476577759, |
|
"eval_runtime": 46.8176, |
|
"eval_samples_per_second": 138.452, |
|
"eval_steps_per_second": 8.672, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.961276383579857e-05, |
|
"loss": 0.8003, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_f1": 0.4380376882658572, |
|
"eval_loss": 1.268869161605835, |
|
"eval_runtime": 52.7668, |
|
"eval_samples_per_second": 122.842, |
|
"eval_steps_per_second": 7.694, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.940501911251455e-05, |
|
"loss": 0.7792, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_f1": 0.4432590805872725, |
|
"eval_loss": 1.2123405933380127, |
|
"eval_runtime": 46.7198, |
|
"eval_samples_per_second": 138.742, |
|
"eval_steps_per_second": 8.69, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.919727438923051e-05, |
|
"loss": 0.79, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_f1": 0.44452383932580275, |
|
"eval_loss": 1.2517160177230835, |
|
"eval_runtime": 46.1817, |
|
"eval_samples_per_second": 140.359, |
|
"eval_steps_per_second": 8.791, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.898952966594649e-05, |
|
"loss": 0.7984, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_f1": 0.4221078291607028, |
|
"eval_loss": 1.2184810638427734, |
|
"eval_runtime": 46.3545, |
|
"eval_samples_per_second": 139.835, |
|
"eval_steps_per_second": 8.759, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.878178494266246e-05, |
|
"loss": 0.7952, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_f1": 0.439579966107181, |
|
"eval_loss": 1.2801449298858643, |
|
"eval_runtime": 46.2023, |
|
"eval_samples_per_second": 140.296, |
|
"eval_steps_per_second": 8.787, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.857404021937843e-05, |
|
"loss": 0.8055, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_f1": 0.4544774942196613, |
|
"eval_loss": 1.2639812231063843, |
|
"eval_runtime": 46.0512, |
|
"eval_samples_per_second": 140.757, |
|
"eval_steps_per_second": 8.816, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.83662954960944e-05, |
|
"loss": 0.8084, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_f1": 0.44333487259913823, |
|
"eval_loss": 1.228055477142334, |
|
"eval_runtime": 46.2747, |
|
"eval_samples_per_second": 140.077, |
|
"eval_steps_per_second": 8.774, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.815855077281037e-05, |
|
"loss": 0.7904, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_f1": 0.43992159119888363, |
|
"eval_loss": 1.2493727207183838, |
|
"eval_runtime": 46.321, |
|
"eval_samples_per_second": 139.937, |
|
"eval_steps_per_second": 8.765, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.795080604952634e-05, |
|
"loss": 0.8057, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_f1": 0.4114745835354577, |
|
"eval_loss": 1.2447556257247925, |
|
"eval_runtime": 46.3954, |
|
"eval_samples_per_second": 139.712, |
|
"eval_steps_per_second": 8.751, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.774306132624231e-05, |
|
"loss": 0.8001, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_f1": 0.41230473935117545, |
|
"eval_loss": 1.2784521579742432, |
|
"eval_runtime": 46.222, |
|
"eval_samples_per_second": 140.236, |
|
"eval_steps_per_second": 8.784, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7535316602958286e-05, |
|
"loss": 0.8293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_f1": 0.4303954902219652, |
|
"eval_loss": 1.1889426708221436, |
|
"eval_runtime": 45.554, |
|
"eval_samples_per_second": 142.293, |
|
"eval_steps_per_second": 8.913, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.7327571879674255e-05, |
|
"loss": 0.8194, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_f1": 0.4301727119748369, |
|
"eval_loss": 1.2015577554702759, |
|
"eval_runtime": 46.1992, |
|
"eval_samples_per_second": 140.305, |
|
"eval_steps_per_second": 8.788, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.711982715639023e-05, |
|
"loss": 0.8028, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_f1": 0.44964098289977084, |
|
"eval_loss": 1.2026257514953613, |
|
"eval_runtime": 45.6586, |
|
"eval_samples_per_second": 141.967, |
|
"eval_steps_per_second": 8.892, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.69120824331062e-05, |
|
"loss": 0.8123, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_f1": 0.4305026988222712, |
|
"eval_loss": 1.2430651187896729, |
|
"eval_runtime": 45.6789, |
|
"eval_samples_per_second": 141.904, |
|
"eval_steps_per_second": 8.888, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.670433770982217e-05, |
|
"loss": 0.7941, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_f1": 0.4185262770510854, |
|
"eval_loss": 1.2300126552581787, |
|
"eval_runtime": 46.2407, |
|
"eval_samples_per_second": 140.179, |
|
"eval_steps_per_second": 8.78, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.649659298653814e-05, |
|
"loss": 0.7815, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_f1": 0.42807790167507703, |
|
"eval_loss": 1.3011759519577026, |
|
"eval_runtime": 45.2153, |
|
"eval_samples_per_second": 143.358, |
|
"eval_steps_per_second": 8.979, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.6288848263254115e-05, |
|
"loss": 0.8081, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_f1": 0.440687373165412, |
|
"eval_loss": 1.253546953201294, |
|
"eval_runtime": 45.7395, |
|
"eval_samples_per_second": 141.715, |
|
"eval_steps_per_second": 8.876, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6081103539970084e-05, |
|
"loss": 0.8086, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_f1": 0.44928394998593935, |
|
"eval_loss": 1.2568650245666504, |
|
"eval_runtime": 45.7963, |
|
"eval_samples_per_second": 141.54, |
|
"eval_steps_per_second": 8.865, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.587335881668606e-05, |
|
"loss": 0.7858, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_f1": 0.43231367666538134, |
|
"eval_loss": 1.2376387119293213, |
|
"eval_runtime": 46.5395, |
|
"eval_samples_per_second": 139.279, |
|
"eval_steps_per_second": 8.724, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.566561409340203e-05, |
|
"loss": 0.8065, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_f1": 0.42028961729158787, |
|
"eval_loss": 1.2222144603729248, |
|
"eval_runtime": 46.2907, |
|
"eval_samples_per_second": 140.028, |
|
"eval_steps_per_second": 8.771, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.5457869370118e-05, |
|
"loss": 0.7991, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_f1": 0.4231707776654273, |
|
"eval_loss": 1.250239372253418, |
|
"eval_runtime": 45.795, |
|
"eval_samples_per_second": 141.544, |
|
"eval_steps_per_second": 8.866, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.525012464683397e-05, |
|
"loss": 0.816, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_f1": 0.4231928313001403, |
|
"eval_loss": 1.2436952590942383, |
|
"eval_runtime": 45.9669, |
|
"eval_samples_per_second": 141.014, |
|
"eval_steps_per_second": 8.832, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.5042379923549944e-05, |
|
"loss": 0.8093, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_f1": 0.40722455645687655, |
|
"eval_loss": 1.1901623010635376, |
|
"eval_runtime": 45.8966, |
|
"eval_samples_per_second": 141.231, |
|
"eval_steps_per_second": 8.846, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.483463520026591e-05, |
|
"loss": 0.6567, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_f1": 0.4358392298680451, |
|
"eval_loss": 1.4940779209136963, |
|
"eval_runtime": 46.3646, |
|
"eval_samples_per_second": 139.805, |
|
"eval_steps_per_second": 8.757, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.462689047698189e-05, |
|
"loss": 0.6304, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_f1": 0.4248894510967205, |
|
"eval_loss": 1.471817135810852, |
|
"eval_runtime": 46.8512, |
|
"eval_samples_per_second": 138.353, |
|
"eval_steps_per_second": 8.666, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.441914575369786e-05, |
|
"loss": 0.6454, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_f1": 0.427949493239817, |
|
"eval_loss": 1.484312891960144, |
|
"eval_runtime": 46.3345, |
|
"eval_samples_per_second": 139.896, |
|
"eval_steps_per_second": 8.762, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.421140103041383e-05, |
|
"loss": 0.6654, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_f1": 0.43504779862979137, |
|
"eval_loss": 1.4933797121047974, |
|
"eval_runtime": 45.9895, |
|
"eval_samples_per_second": 140.945, |
|
"eval_steps_per_second": 8.828, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.40036563071298e-05, |
|
"loss": 0.6478, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_f1": 0.4310701120380386, |
|
"eval_loss": 1.4152840375900269, |
|
"eval_runtime": 46.2675, |
|
"eval_samples_per_second": 140.098, |
|
"eval_steps_per_second": 8.775, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.379591158384577e-05, |
|
"loss": 0.637, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_f1": 0.44398856573642115, |
|
"eval_loss": 1.3994076251983643, |
|
"eval_runtime": 45.8114, |
|
"eval_samples_per_second": 141.493, |
|
"eval_steps_per_second": 8.862, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.358816686056174e-05, |
|
"loss": 0.6398, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_f1": 0.42669623237681525, |
|
"eval_loss": 1.5294607877731323, |
|
"eval_runtime": 46.4175, |
|
"eval_samples_per_second": 139.646, |
|
"eval_steps_per_second": 8.747, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.338042213727772e-05, |
|
"loss": 0.6703, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_f1": 0.4287387139396017, |
|
"eval_loss": 1.3941184282302856, |
|
"eval_runtime": 46.3839, |
|
"eval_samples_per_second": 139.747, |
|
"eval_steps_per_second": 8.753, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.317267741399369e-05, |
|
"loss": 0.6442, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_f1": 0.4371709367951578, |
|
"eval_loss": 1.388688564300537, |
|
"eval_runtime": 46.5417, |
|
"eval_samples_per_second": 139.273, |
|
"eval_steps_per_second": 8.723, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.296493269070966e-05, |
|
"loss": 0.6784, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_f1": 0.43751727834027077, |
|
"eval_loss": 1.3877114057540894, |
|
"eval_runtime": 45.9117, |
|
"eval_samples_per_second": 141.184, |
|
"eval_steps_per_second": 8.843, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.2757187967425626e-05, |
|
"loss": 0.6614, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_f1": 0.4372541317801921, |
|
"eval_loss": 1.4126884937286377, |
|
"eval_runtime": 46.4825, |
|
"eval_samples_per_second": 139.45, |
|
"eval_steps_per_second": 8.734, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.25494432441416e-05, |
|
"loss": 0.6864, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_f1": 0.42774626956090644, |
|
"eval_loss": 1.4882549047470093, |
|
"eval_runtime": 46.0334, |
|
"eval_samples_per_second": 140.811, |
|
"eval_steps_per_second": 8.82, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.234169852085757e-05, |
|
"loss": 0.6636, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_f1": 0.4248446501416097, |
|
"eval_loss": 1.3951656818389893, |
|
"eval_runtime": 45.7857, |
|
"eval_samples_per_second": 141.573, |
|
"eval_steps_per_second": 8.867, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.213395379757354e-05, |
|
"loss": 0.6801, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_f1": 0.41563433666965854, |
|
"eval_loss": 1.4469736814498901, |
|
"eval_runtime": 46.1733, |
|
"eval_samples_per_second": 140.384, |
|
"eval_steps_per_second": 8.793, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.1926209074289517e-05, |
|
"loss": 0.6509, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_f1": 0.42981854634578975, |
|
"eval_loss": 1.3635119199752808, |
|
"eval_runtime": 46.4023, |
|
"eval_samples_per_second": 139.691, |
|
"eval_steps_per_second": 8.75, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.1718464351005486e-05, |
|
"loss": 0.6776, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_f1": 0.4345867863354599, |
|
"eval_loss": 1.3212920427322388, |
|
"eval_runtime": 46.0513, |
|
"eval_samples_per_second": 140.756, |
|
"eval_steps_per_second": 8.816, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.1510719627721455e-05, |
|
"loss": 0.6686, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_f1": 0.4283615179772263, |
|
"eval_loss": 1.3529335260391235, |
|
"eval_runtime": 45.8914, |
|
"eval_samples_per_second": 141.247, |
|
"eval_steps_per_second": 8.847, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.1302974904437424e-05, |
|
"loss": 0.6696, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_f1": 0.42784477364430307, |
|
"eval_loss": 1.3639956712722778, |
|
"eval_runtime": 45.6293, |
|
"eval_samples_per_second": 142.058, |
|
"eval_steps_per_second": 8.898, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.10952301811534e-05, |
|
"loss": 0.6624, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_f1": 0.4325377081208613, |
|
"eval_loss": 1.409765601158142, |
|
"eval_runtime": 47.4821, |
|
"eval_samples_per_second": 136.515, |
|
"eval_steps_per_second": 8.551, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.088748545786937e-05, |
|
"loss": 0.6876, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_f1": 0.4282077409143604, |
|
"eval_loss": 1.4017492532730103, |
|
"eval_runtime": 46.1534, |
|
"eval_samples_per_second": 140.445, |
|
"eval_steps_per_second": 8.797, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.0679740734585346e-05, |
|
"loss": 0.671, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_f1": 0.4172019162341494, |
|
"eval_loss": 1.3028308153152466, |
|
"eval_runtime": 46.2659, |
|
"eval_samples_per_second": 140.103, |
|
"eval_steps_per_second": 8.775, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 3.047199601130131e-05, |
|
"loss": 0.68, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_f1": 0.43831774420843844, |
|
"eval_loss": 1.3964955806732178, |
|
"eval_runtime": 45.9895, |
|
"eval_samples_per_second": 140.945, |
|
"eval_steps_per_second": 8.828, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.0264251288017287e-05, |
|
"loss": 0.6715, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_f1": 0.4301624961844753, |
|
"eval_loss": 1.3572640419006348, |
|
"eval_runtime": 45.9503, |
|
"eval_samples_per_second": 141.065, |
|
"eval_steps_per_second": 8.836, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 3.0056506564733257e-05, |
|
"loss": 0.697, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_f1": 0.4253265083713992, |
|
"eval_loss": 1.3642019033432007, |
|
"eval_runtime": 46.1149, |
|
"eval_samples_per_second": 140.562, |
|
"eval_steps_per_second": 8.804, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.984876184144923e-05, |
|
"loss": 0.5631, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_f1": 0.433265159904986, |
|
"eval_loss": 1.5990760326385498, |
|
"eval_runtime": 45.3111, |
|
"eval_samples_per_second": 143.055, |
|
"eval_steps_per_second": 8.96, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.96410171181652e-05, |
|
"loss": 0.5151, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_f1": 0.4295485993747164, |
|
"eval_loss": 1.6384857892990112, |
|
"eval_runtime": 46.4055, |
|
"eval_samples_per_second": 139.682, |
|
"eval_steps_per_second": 8.749, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.943327239488117e-05, |
|
"loss": 0.5348, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_f1": 0.4240371984001696, |
|
"eval_loss": 1.5903598070144653, |
|
"eval_runtime": 45.9488, |
|
"eval_samples_per_second": 141.07, |
|
"eval_steps_per_second": 8.836, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.922552767159714e-05, |
|
"loss": 0.5288, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_f1": 0.42810904253980414, |
|
"eval_loss": 1.6144169569015503, |
|
"eval_runtime": 46.451, |
|
"eval_samples_per_second": 139.545, |
|
"eval_steps_per_second": 8.74, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.9017782948313117e-05, |
|
"loss": 0.5422, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_f1": 0.4302607705379434, |
|
"eval_loss": 1.7097866535186768, |
|
"eval_runtime": 46.1436, |
|
"eval_samples_per_second": 140.475, |
|
"eval_steps_per_second": 8.799, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.8810038225029086e-05, |
|
"loss": 0.548, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_f1": 0.4281896068096123, |
|
"eval_loss": 1.573617935180664, |
|
"eval_runtime": 46.7725, |
|
"eval_samples_per_second": 138.586, |
|
"eval_steps_per_second": 8.68, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.860229350174506e-05, |
|
"loss": 0.5169, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_f1": 0.4336268684648746, |
|
"eval_loss": 1.5596050024032593, |
|
"eval_runtime": 45.9526, |
|
"eval_samples_per_second": 141.058, |
|
"eval_steps_per_second": 8.835, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.8394548778461028e-05, |
|
"loss": 0.5327, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_f1": 0.4223419321700975, |
|
"eval_loss": 1.6166974306106567, |
|
"eval_runtime": 46.03, |
|
"eval_samples_per_second": 140.821, |
|
"eval_steps_per_second": 8.82, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.8186804055177e-05, |
|
"loss": 0.5343, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_f1": 0.4428612616903695, |
|
"eval_loss": 1.7605165243148804, |
|
"eval_runtime": 45.6649, |
|
"eval_samples_per_second": 141.947, |
|
"eval_steps_per_second": 8.891, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.797905933189297e-05, |
|
"loss": 0.5478, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_f1": 0.44071411652178355, |
|
"eval_loss": 1.6004695892333984, |
|
"eval_runtime": 45.993, |
|
"eval_samples_per_second": 140.935, |
|
"eval_steps_per_second": 8.827, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.7771314608608946e-05, |
|
"loss": 0.5489, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_f1": 0.44084931155643786, |
|
"eval_loss": 1.642219066619873, |
|
"eval_runtime": 45.869, |
|
"eval_samples_per_second": 141.316, |
|
"eval_steps_per_second": 8.851, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.7563569885324915e-05, |
|
"loss": 0.5388, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_f1": 0.4372812211041268, |
|
"eval_loss": 1.7352898120880127, |
|
"eval_runtime": 46.4558, |
|
"eval_samples_per_second": 139.53, |
|
"eval_steps_per_second": 8.739, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.7355825162040887e-05, |
|
"loss": 0.5312, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_f1": 0.42873515159285114, |
|
"eval_loss": 1.6332671642303467, |
|
"eval_runtime": 46.1955, |
|
"eval_samples_per_second": 140.317, |
|
"eval_steps_per_second": 8.789, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.7148080438756857e-05, |
|
"loss": 0.5369, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_f1": 0.4392093355525315, |
|
"eval_loss": 1.5618759393692017, |
|
"eval_runtime": 47.5099, |
|
"eval_samples_per_second": 136.435, |
|
"eval_steps_per_second": 8.546, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.694033571547283e-05, |
|
"loss": 0.5475, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_f1": 0.42820616668057093, |
|
"eval_loss": 1.583003282546997, |
|
"eval_runtime": 46.2808, |
|
"eval_samples_per_second": 140.058, |
|
"eval_steps_per_second": 8.773, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.67325909921888e-05, |
|
"loss": 0.5622, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_f1": 0.4405394714296675, |
|
"eval_loss": 1.5289151668548584, |
|
"eval_runtime": 46.2454, |
|
"eval_samples_per_second": 140.165, |
|
"eval_steps_per_second": 8.779, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.6524846268904768e-05, |
|
"loss": 0.5662, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_f1": 0.43493209599396276, |
|
"eval_loss": 1.5689671039581299, |
|
"eval_runtime": 46.2429, |
|
"eval_samples_per_second": 140.173, |
|
"eval_steps_per_second": 8.78, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.6317101545620744e-05, |
|
"loss": 0.5373, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_f1": 0.4302946240864107, |
|
"eval_loss": 1.6275018453598022, |
|
"eval_runtime": 46.0239, |
|
"eval_samples_per_second": 140.84, |
|
"eval_steps_per_second": 8.821, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.610935682233671e-05, |
|
"loss": 0.5584, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_f1": 0.43485188110217615, |
|
"eval_loss": 1.7044297456741333, |
|
"eval_runtime": 46.2954, |
|
"eval_samples_per_second": 140.014, |
|
"eval_steps_per_second": 8.77, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.5901612099052686e-05, |
|
"loss": 0.5484, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_f1": 0.43712471779629325, |
|
"eval_loss": 1.6315213441848755, |
|
"eval_runtime": 46.0252, |
|
"eval_samples_per_second": 140.836, |
|
"eval_steps_per_second": 8.821, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.5693867375768655e-05, |
|
"loss": 0.5475, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_f1": 0.446221258616921, |
|
"eval_loss": 1.5129351615905762, |
|
"eval_runtime": 46.4009, |
|
"eval_samples_per_second": 139.696, |
|
"eval_steps_per_second": 8.75, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.5486122652484628e-05, |
|
"loss": 0.5551, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_f1": 0.4409313233994151, |
|
"eval_loss": 1.637054204940796, |
|
"eval_runtime": 46.0061, |
|
"eval_samples_per_second": 140.894, |
|
"eval_steps_per_second": 8.825, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.5278377929200597e-05, |
|
"loss": 0.558, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_f1": 0.4335536795951597, |
|
"eval_loss": 1.5173062086105347, |
|
"eval_runtime": 46.0786, |
|
"eval_samples_per_second": 140.673, |
|
"eval_steps_per_second": 8.811, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.5070633205916573e-05, |
|
"loss": 0.5553, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_f1": 0.4349357425201839, |
|
"eval_loss": 1.564207911491394, |
|
"eval_runtime": 45.533, |
|
"eval_samples_per_second": 142.358, |
|
"eval_steps_per_second": 8.917, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.4862888482632542e-05, |
|
"loss": 0.4491, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_f1": 0.43614659724704335, |
|
"eval_loss": 1.9311244487762451, |
|
"eval_runtime": 46.1968, |
|
"eval_samples_per_second": 140.313, |
|
"eval_steps_per_second": 8.788, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4655143759348515e-05, |
|
"loss": 0.403, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_f1": 0.4393254273429778, |
|
"eval_loss": 2.0766100883483887, |
|
"eval_runtime": 45.9492, |
|
"eval_samples_per_second": 141.069, |
|
"eval_steps_per_second": 8.836, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4447399036064487e-05, |
|
"loss": 0.4233, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_f1": 0.43421874747799094, |
|
"eval_loss": 2.0252885818481445, |
|
"eval_runtime": 46.6362, |
|
"eval_samples_per_second": 138.991, |
|
"eval_steps_per_second": 8.706, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.4239654312780457e-05, |
|
"loss": 0.4412, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_f1": 0.43655258613920295, |
|
"eval_loss": 2.0584676265716553, |
|
"eval_runtime": 46.1023, |
|
"eval_samples_per_second": 140.6, |
|
"eval_steps_per_second": 8.807, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.403190958949643e-05, |
|
"loss": 0.4477, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_f1": 0.445008636219538, |
|
"eval_loss": 1.9808226823806763, |
|
"eval_runtime": 46.3962, |
|
"eval_samples_per_second": 139.71, |
|
"eval_steps_per_second": 8.751, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.3824164866212402e-05, |
|
"loss": 0.4497, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_f1": 0.44325755380966075, |
|
"eval_loss": 1.8606414794921875, |
|
"eval_runtime": 46.1573, |
|
"eval_samples_per_second": 140.433, |
|
"eval_steps_per_second": 8.796, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 2.361642014292837e-05, |
|
"loss": 0.4415, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_f1": 0.44300106609021345, |
|
"eval_loss": 2.060542106628418, |
|
"eval_runtime": 45.615, |
|
"eval_samples_per_second": 142.102, |
|
"eval_steps_per_second": 8.901, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 2.340867541964434e-05, |
|
"loss": 0.4655, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_f1": 0.4281324149819077, |
|
"eval_loss": 1.7479959726333618, |
|
"eval_runtime": 45.9941, |
|
"eval_samples_per_second": 140.931, |
|
"eval_steps_per_second": 8.827, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 2.3200930696360313e-05, |
|
"loss": 0.4395, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_f1": 0.4315119731593183, |
|
"eval_loss": 1.851706862449646, |
|
"eval_runtime": 45.6959, |
|
"eval_samples_per_second": 141.851, |
|
"eval_steps_per_second": 8.885, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 2.2993185973076282e-05, |
|
"loss": 0.4738, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_f1": 0.4246487511252701, |
|
"eval_loss": 1.7510011196136475, |
|
"eval_runtime": 46.0796, |
|
"eval_samples_per_second": 140.67, |
|
"eval_steps_per_second": 8.811, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.2785441249792255e-05, |
|
"loss": 0.455, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_f1": 0.42367965460597234, |
|
"eval_loss": 1.7951207160949707, |
|
"eval_runtime": 46.1571, |
|
"eval_samples_per_second": 140.433, |
|
"eval_steps_per_second": 8.796, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 2.2577696526508228e-05, |
|
"loss": 0.4494, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_f1": 0.4400121441304865, |
|
"eval_loss": 1.8601397275924683, |
|
"eval_runtime": 46.6249, |
|
"eval_samples_per_second": 139.024, |
|
"eval_steps_per_second": 8.708, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 2.2369951803224197e-05, |
|
"loss": 0.4364, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_f1": 0.4454509847031083, |
|
"eval_loss": 1.9597169160842896, |
|
"eval_runtime": 45.9584, |
|
"eval_samples_per_second": 141.041, |
|
"eval_steps_per_second": 8.834, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 2.216220707994017e-05, |
|
"loss": 0.4611, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_f1": 0.44059704673003397, |
|
"eval_loss": 1.899012804031372, |
|
"eval_runtime": 47.0101, |
|
"eval_samples_per_second": 137.885, |
|
"eval_steps_per_second": 8.636, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.1954462356656142e-05, |
|
"loss": 0.4434, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_f1": 0.44119414948642377, |
|
"eval_loss": 1.983310341835022, |
|
"eval_runtime": 45.9947, |
|
"eval_samples_per_second": 140.929, |
|
"eval_steps_per_second": 8.827, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.174671763337211e-05, |
|
"loss": 0.45, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_f1": 0.4426699728777116, |
|
"eval_loss": 1.8643006086349487, |
|
"eval_runtime": 46.7241, |
|
"eval_samples_per_second": 138.729, |
|
"eval_steps_per_second": 8.689, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.1538972910088084e-05, |
|
"loss": 0.4657, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_f1": 0.4459768786824306, |
|
"eval_loss": 1.9347878694534302, |
|
"eval_runtime": 46.4171, |
|
"eval_samples_per_second": 139.647, |
|
"eval_steps_per_second": 8.747, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 2.1331228186804057e-05, |
|
"loss": 0.4536, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_f1": 0.43967737232771253, |
|
"eval_loss": 1.9800372123718262, |
|
"eval_runtime": 46.5899, |
|
"eval_samples_per_second": 139.129, |
|
"eval_steps_per_second": 8.714, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.1123483463520026e-05, |
|
"loss": 0.4665, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_f1": 0.43522687025203594, |
|
"eval_loss": 1.7668453454971313, |
|
"eval_runtime": 45.8529, |
|
"eval_samples_per_second": 141.365, |
|
"eval_steps_per_second": 8.854, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.0915738740236e-05, |
|
"loss": 0.4668, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_f1": 0.43357794081082646, |
|
"eval_loss": 1.8984841108322144, |
|
"eval_runtime": 46.4335, |
|
"eval_samples_per_second": 139.597, |
|
"eval_steps_per_second": 8.744, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.070799401695197e-05, |
|
"loss": 0.4622, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_f1": 0.4323966893074926, |
|
"eval_loss": 1.8252310752868652, |
|
"eval_runtime": 45.8779, |
|
"eval_samples_per_second": 141.288, |
|
"eval_steps_per_second": 8.85, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 2.050024929366794e-05, |
|
"loss": 0.4766, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_f1": 0.44080051637694073, |
|
"eval_loss": 1.7060314416885376, |
|
"eval_runtime": 46.3123, |
|
"eval_samples_per_second": 139.963, |
|
"eval_steps_per_second": 8.767, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.0292504570383913e-05, |
|
"loss": 0.4476, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_f1": 0.4385964321265162, |
|
"eval_loss": 1.8184629678726196, |
|
"eval_runtime": 45.9872, |
|
"eval_samples_per_second": 140.952, |
|
"eval_steps_per_second": 8.829, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 2.0084759847099886e-05, |
|
"loss": 0.4602, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_f1": 0.44088418061549356, |
|
"eval_loss": 1.7630596160888672, |
|
"eval_runtime": 46.6035, |
|
"eval_samples_per_second": 139.088, |
|
"eval_steps_per_second": 8.712, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.9877015123815855e-05, |
|
"loss": 0.3679, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_f1": 0.4346630256587338, |
|
"eval_loss": 2.1265206336975098, |
|
"eval_runtime": 46.1031, |
|
"eval_samples_per_second": 140.598, |
|
"eval_steps_per_second": 8.806, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.9669270400531827e-05, |
|
"loss": 0.3708, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_f1": 0.4350030580705022, |
|
"eval_loss": 2.233613967895508, |
|
"eval_runtime": 46.3001, |
|
"eval_samples_per_second": 140.0, |
|
"eval_steps_per_second": 8.769, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.94615256772478e-05, |
|
"loss": 0.3761, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_f1": 0.4315408453045143, |
|
"eval_loss": 2.161686420440674, |
|
"eval_runtime": 46.0645, |
|
"eval_samples_per_second": 140.716, |
|
"eval_steps_per_second": 8.814, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.925378095396377e-05, |
|
"loss": 0.382, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_f1": 0.4300930630733001, |
|
"eval_loss": 2.2093658447265625, |
|
"eval_runtime": 46.5249, |
|
"eval_samples_per_second": 139.323, |
|
"eval_steps_per_second": 8.727, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.9046036230679742e-05, |
|
"loss": 0.3606, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_f1": 0.4263466007824531, |
|
"eval_loss": 2.422569751739502, |
|
"eval_runtime": 46.1496, |
|
"eval_samples_per_second": 140.456, |
|
"eval_steps_per_second": 8.797, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.8838291507395715e-05, |
|
"loss": 0.3929, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_f1": 0.42870899531401374, |
|
"eval_loss": 2.2339413166046143, |
|
"eval_runtime": 45.9626, |
|
"eval_samples_per_second": 141.028, |
|
"eval_steps_per_second": 8.833, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.8630546784111684e-05, |
|
"loss": 0.3761, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_f1": 0.43084241603610085, |
|
"eval_loss": 2.245156764984131, |
|
"eval_runtime": 45.9689, |
|
"eval_samples_per_second": 141.008, |
|
"eval_steps_per_second": 8.832, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.8422802060827657e-05, |
|
"loss": 0.3735, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_f1": 0.42730161492497354, |
|
"eval_loss": 2.375741720199585, |
|
"eval_runtime": 45.5018, |
|
"eval_samples_per_second": 142.456, |
|
"eval_steps_per_second": 8.923, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.821505733754363e-05, |
|
"loss": 0.3824, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_f1": 0.42850714365323117, |
|
"eval_loss": 2.4190187454223633, |
|
"eval_runtime": 46.0335, |
|
"eval_samples_per_second": 140.81, |
|
"eval_steps_per_second": 8.82, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.80073126142596e-05, |
|
"loss": 0.4198, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_f1": 0.41957506619020246, |
|
"eval_loss": 2.291776657104492, |
|
"eval_runtime": 45.9135, |
|
"eval_samples_per_second": 141.179, |
|
"eval_steps_per_second": 8.843, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 1.779956789097557e-05, |
|
"loss": 0.3759, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"eval_f1": 0.4248491736870143, |
|
"eval_loss": 2.2740871906280518, |
|
"eval_runtime": 46.8387, |
|
"eval_samples_per_second": 138.39, |
|
"eval_steps_per_second": 8.668, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.759182316769154e-05, |
|
"loss": 0.4006, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"eval_f1": 0.43213490469184823, |
|
"eval_loss": 2.3717033863067627, |
|
"eval_runtime": 46.2157, |
|
"eval_samples_per_second": 140.255, |
|
"eval_steps_per_second": 8.785, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 1.7384078444407513e-05, |
|
"loss": 0.374, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_f1": 0.42844878973090866, |
|
"eval_loss": 2.2131588459014893, |
|
"eval_runtime": 46.4565, |
|
"eval_samples_per_second": 139.528, |
|
"eval_steps_per_second": 8.739, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.7176333721123482e-05, |
|
"loss": 0.3985, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_f1": 0.430486194952956, |
|
"eval_loss": 2.2677221298217773, |
|
"eval_runtime": 45.8499, |
|
"eval_samples_per_second": 141.374, |
|
"eval_steps_per_second": 8.855, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.6968588997839455e-05, |
|
"loss": 0.3892, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_f1": 0.4339078659073283, |
|
"eval_loss": 2.2518913745880127, |
|
"eval_runtime": 46.9488, |
|
"eval_samples_per_second": 138.065, |
|
"eval_steps_per_second": 8.648, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.6760844274555427e-05, |
|
"loss": 0.4071, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_f1": 0.4281750460163451, |
|
"eval_loss": 2.1826319694519043, |
|
"eval_runtime": 46.8418, |
|
"eval_samples_per_second": 138.381, |
|
"eval_steps_per_second": 8.667, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.6553099551271397e-05, |
|
"loss": 0.4054, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_f1": 0.425255645027199, |
|
"eval_loss": 2.1723647117614746, |
|
"eval_runtime": 46.2813, |
|
"eval_samples_per_second": 140.057, |
|
"eval_steps_per_second": 8.772, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 1.634535482798737e-05, |
|
"loss": 0.374, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"eval_f1": 0.4243673281546822, |
|
"eval_loss": 2.208036184310913, |
|
"eval_runtime": 45.8911, |
|
"eval_samples_per_second": 141.248, |
|
"eval_steps_per_second": 8.847, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.6137610104703342e-05, |
|
"loss": 0.4086, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_f1": 0.4345723086799732, |
|
"eval_loss": 2.182802438735962, |
|
"eval_runtime": 45.7981, |
|
"eval_samples_per_second": 141.534, |
|
"eval_steps_per_second": 8.865, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.592986538141931e-05, |
|
"loss": 0.3828, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_f1": 0.43379920447502057, |
|
"eval_loss": 2.186859369277954, |
|
"eval_runtime": 45.8903, |
|
"eval_samples_per_second": 141.25, |
|
"eval_steps_per_second": 8.847, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 1.5722120658135284e-05, |
|
"loss": 0.4065, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_f1": 0.43151860579932383, |
|
"eval_loss": 2.138597249984741, |
|
"eval_runtime": 46.0037, |
|
"eval_samples_per_second": 140.902, |
|
"eval_steps_per_second": 8.825, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.5514375934851256e-05, |
|
"loss": 0.4058, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_f1": 0.43162515735240276, |
|
"eval_loss": 2.0976452827453613, |
|
"eval_runtime": 46.3993, |
|
"eval_samples_per_second": 139.7, |
|
"eval_steps_per_second": 8.75, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.5306631211567226e-05, |
|
"loss": 0.3873, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_f1": 0.4212864273818023, |
|
"eval_loss": 2.4226503372192383, |
|
"eval_runtime": 46.1616, |
|
"eval_samples_per_second": 140.42, |
|
"eval_steps_per_second": 8.795, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.5098886488283198e-05, |
|
"loss": 0.389, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_f1": 0.431436402933277, |
|
"eval_loss": 2.1598784923553467, |
|
"eval_runtime": 46.8062, |
|
"eval_samples_per_second": 138.486, |
|
"eval_steps_per_second": 8.674, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.489114176499917e-05, |
|
"loss": 0.3538, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_f1": 0.4350556588831767, |
|
"eval_loss": 2.304293155670166, |
|
"eval_runtime": 46.574, |
|
"eval_samples_per_second": 139.176, |
|
"eval_steps_per_second": 8.717, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4683397041715142e-05, |
|
"loss": 0.3132, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_f1": 0.4266591713234993, |
|
"eval_loss": 2.650728702545166, |
|
"eval_runtime": 46.4231, |
|
"eval_samples_per_second": 139.629, |
|
"eval_steps_per_second": 8.746, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.4475652318431113e-05, |
|
"loss": 0.3166, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_f1": 0.43607320504273067, |
|
"eval_loss": 2.6059470176696777, |
|
"eval_runtime": 45.9726, |
|
"eval_samples_per_second": 140.997, |
|
"eval_steps_per_second": 8.831, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.4267907595147084e-05, |
|
"loss": 0.3303, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_f1": 0.43326161921977124, |
|
"eval_loss": 2.56927227973938, |
|
"eval_runtime": 46.099, |
|
"eval_samples_per_second": 140.611, |
|
"eval_steps_per_second": 8.807, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.4060162871863056e-05, |
|
"loss": 0.3031, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_f1": 0.43263480546826966, |
|
"eval_loss": 2.6123645305633545, |
|
"eval_runtime": 46.1092, |
|
"eval_samples_per_second": 140.579, |
|
"eval_steps_per_second": 8.805, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.3852418148579027e-05, |
|
"loss": 0.3495, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"eval_f1": 0.42935020523616974, |
|
"eval_loss": 2.5714728832244873, |
|
"eval_runtime": 46.4434, |
|
"eval_samples_per_second": 139.568, |
|
"eval_steps_per_second": 8.742, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.3644673425294998e-05, |
|
"loss": 0.3364, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_f1": 0.4343767918185199, |
|
"eval_loss": 2.5482189655303955, |
|
"eval_runtime": 45.6061, |
|
"eval_samples_per_second": 142.13, |
|
"eval_steps_per_second": 8.902, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.3436928702010971e-05, |
|
"loss": 0.3169, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_f1": 0.4326820837375438, |
|
"eval_loss": 2.533505916595459, |
|
"eval_runtime": 45.7021, |
|
"eval_samples_per_second": 141.832, |
|
"eval_steps_per_second": 8.884, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.3229183978726942e-05, |
|
"loss": 0.3306, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_f1": 0.4336259208362092, |
|
"eval_loss": 2.505479097366333, |
|
"eval_runtime": 45.9282, |
|
"eval_samples_per_second": 141.133, |
|
"eval_steps_per_second": 8.84, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.3021439255442913e-05, |
|
"loss": 0.3238, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_f1": 0.4290241175053658, |
|
"eval_loss": 2.64029598236084, |
|
"eval_runtime": 45.7506, |
|
"eval_samples_per_second": 141.681, |
|
"eval_steps_per_second": 8.874, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.2813694532158885e-05, |
|
"loss": 0.3488, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_f1": 0.4318857789484726, |
|
"eval_loss": 2.4507477283477783, |
|
"eval_runtime": 46.4161, |
|
"eval_samples_per_second": 139.65, |
|
"eval_steps_per_second": 8.747, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.2605949808874856e-05, |
|
"loss": 0.3423, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_f1": 0.42784593140742677, |
|
"eval_loss": 2.4914281368255615, |
|
"eval_runtime": 46.3911, |
|
"eval_samples_per_second": 139.725, |
|
"eval_steps_per_second": 8.752, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 1.2398205085590826e-05, |
|
"loss": 0.3356, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_f1": 0.44002802787763307, |
|
"eval_loss": 2.723588466644287, |
|
"eval_runtime": 46.564, |
|
"eval_samples_per_second": 139.206, |
|
"eval_steps_per_second": 8.719, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.2190460362306798e-05, |
|
"loss": 0.3655, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_f1": 0.4342107835672762, |
|
"eval_loss": 2.516355276107788, |
|
"eval_runtime": 46.0601, |
|
"eval_samples_per_second": 140.729, |
|
"eval_steps_per_second": 8.815, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1.198271563902277e-05, |
|
"loss": 0.3445, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_f1": 0.4392682142110146, |
|
"eval_loss": 2.4509174823760986, |
|
"eval_runtime": 46.5389, |
|
"eval_samples_per_second": 139.281, |
|
"eval_steps_per_second": 8.724, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 1.177497091573874e-05, |
|
"loss": 0.3123, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_f1": 0.4346012419342983, |
|
"eval_loss": 2.605870008468628, |
|
"eval_runtime": 46.0454, |
|
"eval_samples_per_second": 140.774, |
|
"eval_steps_per_second": 8.817, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.1567226192454713e-05, |
|
"loss": 0.3286, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_f1": 0.42900413120162884, |
|
"eval_loss": 2.5746617317199707, |
|
"eval_runtime": 46.6316, |
|
"eval_samples_per_second": 139.005, |
|
"eval_steps_per_second": 8.707, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 1.1359481469170684e-05, |
|
"loss": 0.3308, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"eval_f1": 0.4348325522174462, |
|
"eval_loss": 2.6604156494140625, |
|
"eval_runtime": 46.0552, |
|
"eval_samples_per_second": 140.744, |
|
"eval_steps_per_second": 8.816, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.1151736745886655e-05, |
|
"loss": 0.3507, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_f1": 0.4362724679273792, |
|
"eval_loss": 2.6467623710632324, |
|
"eval_runtime": 46.2441, |
|
"eval_samples_per_second": 140.169, |
|
"eval_steps_per_second": 8.78, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 1.0943992022602627e-05, |
|
"loss": 0.3392, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_f1": 0.43161760121865855, |
|
"eval_loss": 2.6293702125549316, |
|
"eval_runtime": 45.6229, |
|
"eval_samples_per_second": 142.078, |
|
"eval_steps_per_second": 8.899, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1.0736247299318598e-05, |
|
"loss": 0.3885, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_f1": 0.42849041218645784, |
|
"eval_loss": 2.4070699214935303, |
|
"eval_runtime": 45.9402, |
|
"eval_samples_per_second": 141.096, |
|
"eval_steps_per_second": 8.838, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 1.052850257603457e-05, |
|
"loss": 0.3458, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_f1": 0.42803465998306145, |
|
"eval_loss": 2.5539064407348633, |
|
"eval_runtime": 46.1313, |
|
"eval_samples_per_second": 140.512, |
|
"eval_steps_per_second": 8.801, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.032075785275054e-05, |
|
"loss": 0.3451, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_f1": 0.4282701100981614, |
|
"eval_loss": 2.4033892154693604, |
|
"eval_runtime": 46.4312, |
|
"eval_samples_per_second": 139.604, |
|
"eval_steps_per_second": 8.744, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 1.0113013129466511e-05, |
|
"loss": 0.3394, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_f1": 0.43486076258951, |
|
"eval_loss": 2.6116716861724854, |
|
"eval_runtime": 45.9663, |
|
"eval_samples_per_second": 141.016, |
|
"eval_steps_per_second": 8.833, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.905268406182484e-06, |
|
"loss": 0.3275, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_f1": 0.4322422606334531, |
|
"eval_loss": 2.5991387367248535, |
|
"eval_runtime": 46.5777, |
|
"eval_samples_per_second": 139.165, |
|
"eval_steps_per_second": 8.717, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.697523682898455e-06, |
|
"loss": 0.2676, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_f1": 0.42999721877624847, |
|
"eval_loss": 2.772134304046631, |
|
"eval_runtime": 46.0246, |
|
"eval_samples_per_second": 140.838, |
|
"eval_steps_per_second": 8.821, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.489778959614426e-06, |
|
"loss": 0.3013, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_f1": 0.43159307099859695, |
|
"eval_loss": 2.7766764163970947, |
|
"eval_runtime": 46.3012, |
|
"eval_samples_per_second": 139.996, |
|
"eval_steps_per_second": 8.769, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.282034236330398e-06, |
|
"loss": 0.283, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_f1": 0.43429947059083457, |
|
"eval_loss": 2.723806142807007, |
|
"eval_runtime": 46.2805, |
|
"eval_samples_per_second": 140.059, |
|
"eval_steps_per_second": 8.773, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.07428951304637e-06, |
|
"loss": 0.2924, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_f1": 0.43274102934044933, |
|
"eval_loss": 2.7132375240325928, |
|
"eval_runtime": 46.1859, |
|
"eval_samples_per_second": 140.346, |
|
"eval_steps_per_second": 8.791, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.86654478976234e-06, |
|
"loss": 0.2817, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_f1": 0.427616805108728, |
|
"eval_loss": 2.7119312286376953, |
|
"eval_runtime": 56.0523, |
|
"eval_samples_per_second": 115.642, |
|
"eval_steps_per_second": 7.243, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.658800066478313e-06, |
|
"loss": 0.2586, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"eval_f1": 0.4289585910640077, |
|
"eval_loss": 2.887305736541748, |
|
"eval_runtime": 46.8426, |
|
"eval_samples_per_second": 138.378, |
|
"eval_steps_per_second": 8.667, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.451055343194284e-06, |
|
"loss": 0.3051, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"eval_f1": 0.42582497432500993, |
|
"eval_loss": 2.7572007179260254, |
|
"eval_runtime": 45.6991, |
|
"eval_samples_per_second": 141.841, |
|
"eval_steps_per_second": 8.884, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 8.243310619910255e-06, |
|
"loss": 0.2916, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_f1": 0.43075118593793377, |
|
"eval_loss": 2.814906358718872, |
|
"eval_runtime": 46.2133, |
|
"eval_samples_per_second": 140.263, |
|
"eval_steps_per_second": 8.785, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.035565896626227e-06, |
|
"loss": 0.2948, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_f1": 0.4244624620028211, |
|
"eval_loss": 2.6767539978027344, |
|
"eval_runtime": 46.5451, |
|
"eval_samples_per_second": 139.263, |
|
"eval_steps_per_second": 8.723, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 7.827821173342198e-06, |
|
"loss": 0.277, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_f1": 0.4273818288568198, |
|
"eval_loss": 2.727567672729492, |
|
"eval_runtime": 46.8521, |
|
"eval_samples_per_second": 138.35, |
|
"eval_steps_per_second": 8.666, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.620076450058169e-06, |
|
"loss": 0.2929, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_f1": 0.4320719535965778, |
|
"eval_loss": 2.926513433456421, |
|
"eval_runtime": 47.1016, |
|
"eval_samples_per_second": 137.617, |
|
"eval_steps_per_second": 8.62, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.412331726774139e-06, |
|
"loss": 0.2893, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_f1": 0.4260519729309167, |
|
"eval_loss": 2.877741575241089, |
|
"eval_runtime": 45.6581, |
|
"eval_samples_per_second": 141.968, |
|
"eval_steps_per_second": 8.892, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.204587003490111e-06, |
|
"loss": 0.2808, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_f1": 0.4282218551070931, |
|
"eval_loss": 2.7379603385925293, |
|
"eval_runtime": 45.9855, |
|
"eval_samples_per_second": 140.957, |
|
"eval_steps_per_second": 8.829, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.996842280206083e-06, |
|
"loss": 0.2728, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_f1": 0.43170842579550445, |
|
"eval_loss": 2.7965376377105713, |
|
"eval_runtime": 45.7627, |
|
"eval_samples_per_second": 141.644, |
|
"eval_steps_per_second": 8.872, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.789097556922054e-06, |
|
"loss": 0.2789, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_f1": 0.4315708387912964, |
|
"eval_loss": 2.875927209854126, |
|
"eval_runtime": 45.9208, |
|
"eval_samples_per_second": 141.156, |
|
"eval_steps_per_second": 8.841, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 6.5813528336380256e-06, |
|
"loss": 0.3072, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_f1": 0.42772350225777217, |
|
"eval_loss": 2.8333616256713867, |
|
"eval_runtime": 49.0657, |
|
"eval_samples_per_second": 132.109, |
|
"eval_steps_per_second": 8.275, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.373608110353997e-06, |
|
"loss": 0.2779, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_f1": 0.4306437356669429, |
|
"eval_loss": 2.8720405101776123, |
|
"eval_runtime": 46.0481, |
|
"eval_samples_per_second": 140.766, |
|
"eval_steps_per_second": 8.817, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.165863387069968e-06, |
|
"loss": 0.2948, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"eval_f1": 0.42931126691296156, |
|
"eval_loss": 2.82370662689209, |
|
"eval_runtime": 45.6262, |
|
"eval_samples_per_second": 142.067, |
|
"eval_steps_per_second": 8.898, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.95811866378594e-06, |
|
"loss": 0.2917, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_f1": 0.42987557908855206, |
|
"eval_loss": 2.785443067550659, |
|
"eval_runtime": 45.7985, |
|
"eval_samples_per_second": 141.533, |
|
"eval_steps_per_second": 8.865, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 5.750373940501912e-06, |
|
"loss": 0.2604, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_f1": 0.4272210968347812, |
|
"eval_loss": 2.9237265586853027, |
|
"eval_runtime": 45.9348, |
|
"eval_samples_per_second": 141.113, |
|
"eval_steps_per_second": 8.839, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.542629217217883e-06, |
|
"loss": 0.3057, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_f1": 0.4287531329908217, |
|
"eval_loss": 2.8509225845336914, |
|
"eval_runtime": 46.4174, |
|
"eval_samples_per_second": 139.646, |
|
"eval_steps_per_second": 8.747, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 5.334884493933855e-06, |
|
"loss": 0.2853, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_f1": 0.4273342046666312, |
|
"eval_loss": 2.7482059001922607, |
|
"eval_runtime": 45.8656, |
|
"eval_samples_per_second": 141.326, |
|
"eval_steps_per_second": 8.852, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 5.1271397706498255e-06, |
|
"loss": 0.2946, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_f1": 0.4272429839187248, |
|
"eval_loss": 2.86079740524292, |
|
"eval_runtime": 46.3563, |
|
"eval_samples_per_second": 139.83, |
|
"eval_steps_per_second": 8.758, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.919395047365797e-06, |
|
"loss": 0.2492, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_f1": 0.4263680579842533, |
|
"eval_loss": 3.0018138885498047, |
|
"eval_runtime": 46.2406, |
|
"eval_samples_per_second": 140.18, |
|
"eval_steps_per_second": 8.78, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 4.711650324081768e-06, |
|
"loss": 0.2471, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_f1": 0.42881222429550997, |
|
"eval_loss": 3.014409303665161, |
|
"eval_runtime": 46.1142, |
|
"eval_samples_per_second": 140.564, |
|
"eval_steps_per_second": 8.804, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.50390560079774e-06, |
|
"loss": 0.2497, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_f1": 0.4299801659722133, |
|
"eval_loss": 3.0282411575317383, |
|
"eval_runtime": 46.4599, |
|
"eval_samples_per_second": 139.518, |
|
"eval_steps_per_second": 8.739, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 4.296160877513712e-06, |
|
"loss": 0.2632, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_f1": 0.4285290598627509, |
|
"eval_loss": 2.9987945556640625, |
|
"eval_runtime": 46.2719, |
|
"eval_samples_per_second": 140.085, |
|
"eval_steps_per_second": 8.774, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.088416154229683e-06, |
|
"loss": 0.2292, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_f1": 0.4295693743332262, |
|
"eval_loss": 2.9812545776367188, |
|
"eval_runtime": 46.0798, |
|
"eval_samples_per_second": 140.669, |
|
"eval_steps_per_second": 8.811, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.8806714309456546e-06, |
|
"loss": 0.2467, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"eval_f1": 0.4292138712207353, |
|
"eval_loss": 2.9471848011016846, |
|
"eval_runtime": 46.1858, |
|
"eval_samples_per_second": 140.346, |
|
"eval_steps_per_second": 8.791, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.672926707661625e-06, |
|
"loss": 0.2405, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_f1": 0.4283879568475488, |
|
"eval_loss": 3.013239860534668, |
|
"eval_runtime": 46.341, |
|
"eval_samples_per_second": 139.876, |
|
"eval_steps_per_second": 8.761, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.465181984377597e-06, |
|
"loss": 0.2479, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_f1": 0.43018803192623334, |
|
"eval_loss": 2.974679946899414, |
|
"eval_runtime": 46.0989, |
|
"eval_samples_per_second": 140.611, |
|
"eval_steps_per_second": 8.807, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.2574372610935682e-06, |
|
"loss": 0.2415, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"eval_f1": 0.43026318885711246, |
|
"eval_loss": 2.9601778984069824, |
|
"eval_runtime": 46.5726, |
|
"eval_samples_per_second": 139.18, |
|
"eval_steps_per_second": 8.718, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.0496925378095396e-06, |
|
"loss": 0.2387, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_f1": 0.4279864446994403, |
|
"eval_loss": 2.995856285095215, |
|
"eval_runtime": 45.9971, |
|
"eval_samples_per_second": 140.922, |
|
"eval_steps_per_second": 8.827, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.8419478145255114e-06, |
|
"loss": 0.2005, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_f1": 0.4226136111773091, |
|
"eval_loss": 3.061025857925415, |
|
"eval_runtime": 46.3377, |
|
"eval_samples_per_second": 139.886, |
|
"eval_steps_per_second": 8.762, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.6342030912414823e-06, |
|
"loss": 0.2479, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_f1": 0.4310947389401135, |
|
"eval_loss": 3.0478320121765137, |
|
"eval_runtime": 46.1598, |
|
"eval_samples_per_second": 140.425, |
|
"eval_steps_per_second": 8.796, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 2.426458367957454e-06, |
|
"loss": 0.2649, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_f1": 0.42937936025434953, |
|
"eval_loss": 2.964843511581421, |
|
"eval_runtime": 45.9242, |
|
"eval_samples_per_second": 141.146, |
|
"eval_steps_per_second": 8.841, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.2187136446734255e-06, |
|
"loss": 0.2659, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"eval_f1": 0.42954375020993524, |
|
"eval_loss": 2.9387714862823486, |
|
"eval_runtime": 46.5353, |
|
"eval_samples_per_second": 139.292, |
|
"eval_steps_per_second": 8.725, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.010968921389397e-06, |
|
"loss": 0.2723, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_f1": 0.43453017211998707, |
|
"eval_loss": 2.942929744720459, |
|
"eval_runtime": 45.4198, |
|
"eval_samples_per_second": 142.713, |
|
"eval_steps_per_second": 8.939, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.8032241981053682e-06, |
|
"loss": 0.2607, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_f1": 0.43214152175935766, |
|
"eval_loss": 2.9575393199920654, |
|
"eval_runtime": 46.6531, |
|
"eval_samples_per_second": 138.94, |
|
"eval_steps_per_second": 8.703, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.5954794748213396e-06, |
|
"loss": 0.2284, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_f1": 0.43057572034563696, |
|
"eval_loss": 2.9958791732788086, |
|
"eval_runtime": 45.9052, |
|
"eval_samples_per_second": 141.204, |
|
"eval_steps_per_second": 8.844, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.387734751537311e-06, |
|
"loss": 0.2703, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_f1": 0.4305604104069932, |
|
"eval_loss": 2.9565792083740234, |
|
"eval_runtime": 46.6048, |
|
"eval_samples_per_second": 139.084, |
|
"eval_steps_per_second": 8.712, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.1799900282532825e-06, |
|
"loss": 0.2358, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"eval_f1": 0.4299059329453134, |
|
"eval_loss": 2.9763364791870117, |
|
"eval_runtime": 45.9929, |
|
"eval_samples_per_second": 140.935, |
|
"eval_steps_per_second": 8.827, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 9.722453049692539e-07, |
|
"loss": 0.2633, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"eval_f1": 0.42872674431828584, |
|
"eval_loss": 2.9550485610961914, |
|
"eval_runtime": 46.191, |
|
"eval_samples_per_second": 140.33, |
|
"eval_steps_per_second": 8.79, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 7.645005816852252e-07, |
|
"loss": 0.2184, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_f1": 0.43016074486231115, |
|
"eval_loss": 2.9980921745300293, |
|
"eval_runtime": 46.1749, |
|
"eval_samples_per_second": 140.379, |
|
"eval_steps_per_second": 8.793, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 5.567558584011966e-07, |
|
"loss": 0.2299, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_f1": 0.4306080177005011, |
|
"eval_loss": 3.0055530071258545, |
|
"eval_runtime": 46.0833, |
|
"eval_samples_per_second": 140.658, |
|
"eval_steps_per_second": 8.81, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 3.4901113511716807e-07, |
|
"loss": 0.2469, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"eval_f1": 0.4310614992045377, |
|
"eval_loss": 2.9983813762664795, |
|
"eval_runtime": 46.3038, |
|
"eval_samples_per_second": 139.989, |
|
"eval_steps_per_second": 8.768, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.4126641183313946e-07, |
|
"loss": 0.2741, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_f1": 0.43169668013947665, |
|
"eval_loss": 2.9964375495910645, |
|
"eval_runtime": 46.5054, |
|
"eval_samples_per_second": 139.382, |
|
"eval_steps_per_second": 8.73, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 120340, |
|
"total_flos": 5.252278015131096e+16, |
|
"train_loss": 0.5760954068351747, |
|
"train_runtime": 42398.6115, |
|
"train_samples_per_second": 22.706, |
|
"train_steps_per_second": 2.838 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 120340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.252278015131096e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|