{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 120340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.979225527671597e-05, "loss": 1.2637, "step": 500 }, { "epoch": 0.04, "eval_f1": 0.2863453369025287, "eval_loss": 1.204175591468811, "eval_runtime": 51.6606, "eval_samples_per_second": 125.473, "eval_steps_per_second": 7.859, "step": 500 }, { "epoch": 0.08, "learning_rate": 4.958451055343195e-05, "loss": 1.213, "step": 1000 }, { "epoch": 0.08, "eval_f1": 0.274772728922474, "eval_loss": 1.2542401552200317, "eval_runtime": 46.4286, "eval_samples_per_second": 139.612, "eval_steps_per_second": 8.745, "step": 1000 }, { "epoch": 0.12, "learning_rate": 4.937676583014792e-05, "loss": 1.1852, "step": 1500 }, { "epoch": 0.12, "eval_f1": 0.31238188693470137, "eval_loss": 1.1441909074783325, "eval_runtime": 46.2394, "eval_samples_per_second": 140.183, "eval_steps_per_second": 8.78, "step": 1500 }, { "epoch": 0.17, "learning_rate": 4.9169021106863886e-05, "loss": 1.1495, "step": 2000 }, { "epoch": 0.17, "eval_f1": 0.3303277428471857, "eval_loss": 1.19601309299469, "eval_runtime": 46.3042, "eval_samples_per_second": 139.987, "eval_steps_per_second": 8.768, "step": 2000 }, { "epoch": 0.21, "learning_rate": 4.8961276383579855e-05, "loss": 1.1406, "step": 2500 }, { "epoch": 0.21, "eval_f1": 0.33363478480748904, "eval_loss": 1.2050482034683228, "eval_runtime": 46.0365, "eval_samples_per_second": 140.801, "eval_steps_per_second": 8.819, "step": 2500 }, { "epoch": 0.25, "learning_rate": 4.875353166029583e-05, "loss": 1.1332, "step": 3000 }, { "epoch": 0.25, "eval_f1": 0.35687474820142895, "eval_loss": 1.1201504468917847, "eval_runtime": 45.6751, "eval_samples_per_second": 141.915, "eval_steps_per_second": 8.889, "step": 3000 }, { "epoch": 0.29, "learning_rate": 4.85457869370118e-05, "loss": 1.1007, "step": 3500 }, { "epoch": 0.29, "eval_f1": 0.3605293337176752, "eval_loss": 1.1953203678131104, "eval_runtime": 45.2576, "eval_samples_per_second": 143.225, "eval_steps_per_second": 8.971, "step": 3500 }, { "epoch": 0.33, "learning_rate": 4.833804221372778e-05, "loss": 1.1157, "step": 4000 }, { "epoch": 0.33, "eval_f1": 0.3862175225700333, "eval_loss": 1.1009345054626465, "eval_runtime": 46.0236, "eval_samples_per_second": 140.841, "eval_steps_per_second": 8.822, "step": 4000 }, { "epoch": 0.37, "learning_rate": 4.8130297490443746e-05, "loss": 1.1172, "step": 4500 }, { "epoch": 0.37, "eval_f1": 0.3819750143486552, "eval_loss": 1.126935362815857, "eval_runtime": 46.0628, "eval_samples_per_second": 140.721, "eval_steps_per_second": 8.814, "step": 4500 }, { "epoch": 0.42, "learning_rate": 4.7922552767159715e-05, "loss": 1.1041, "step": 5000 }, { "epoch": 0.42, "eval_f1": 0.29887014740598117, "eval_loss": 1.1668146848678589, "eval_runtime": 46.4347, "eval_samples_per_second": 139.594, "eval_steps_per_second": 8.743, "step": 5000 }, { "epoch": 0.46, "learning_rate": 4.7714808043875684e-05, "loss": 1.102, "step": 5500 }, { "epoch": 0.46, "eval_f1": 0.418642853322715, "eval_loss": 1.1066502332687378, "eval_runtime": 46.0383, "eval_samples_per_second": 140.796, "eval_steps_per_second": 8.819, "step": 5500 }, { "epoch": 0.5, "learning_rate": 4.750706332059166e-05, "loss": 1.0878, "step": 6000 }, { "epoch": 0.5, "eval_f1": 0.3200899667689199, "eval_loss": 1.1729530096054077, "eval_runtime": 46.8484, "eval_samples_per_second": 138.361, "eval_steps_per_second": 8.666, "step": 6000 }, { "epoch": 0.54, "learning_rate": 4.729931859730763e-05, "loss": 1.0866, "step": 6500 }, { "epoch": 0.54, "eval_f1": 0.38888205294003, "eval_loss": 1.108739972114563, "eval_runtime": 46.5428, "eval_samples_per_second": 139.27, "eval_steps_per_second": 8.723, "step": 6500 }, { "epoch": 0.58, "learning_rate": 4.7091573874023606e-05, "loss": 1.0729, "step": 7000 }, { "epoch": 0.58, "eval_f1": 0.335409883600229, "eval_loss": 1.1224578619003296, "eval_runtime": 46.3948, "eval_samples_per_second": 139.714, "eval_steps_per_second": 8.751, "step": 7000 }, { "epoch": 0.62, "learning_rate": 4.6883829150739575e-05, "loss": 1.0684, "step": 7500 }, { "epoch": 0.62, "eval_f1": 0.4087602196471012, "eval_loss": 1.1329175233840942, "eval_runtime": 46.5357, "eval_samples_per_second": 139.291, "eval_steps_per_second": 8.724, "step": 7500 }, { "epoch": 0.66, "learning_rate": 4.6676084427455544e-05, "loss": 1.0633, "step": 8000 }, { "epoch": 0.66, "eval_f1": 0.39803057267400827, "eval_loss": 1.1004011631011963, "eval_runtime": 46.1396, "eval_samples_per_second": 140.487, "eval_steps_per_second": 8.799, "step": 8000 }, { "epoch": 0.71, "learning_rate": 4.6468339704171513e-05, "loss": 1.0739, "step": 8500 }, { "epoch": 0.71, "eval_f1": 0.39430507046678914, "eval_loss": 1.090652585029602, "eval_runtime": 46.3294, "eval_samples_per_second": 139.911, "eval_steps_per_second": 8.763, "step": 8500 }, { "epoch": 0.75, "learning_rate": 4.626059498088749e-05, "loss": 1.0646, "step": 9000 }, { "epoch": 0.75, "eval_f1": 0.4204523467443143, "eval_loss": 1.1204614639282227, "eval_runtime": 46.2687, "eval_samples_per_second": 140.095, "eval_steps_per_second": 8.775, "step": 9000 }, { "epoch": 0.79, "learning_rate": 4.605285025760346e-05, "loss": 1.0581, "step": 9500 }, { "epoch": 0.79, "eval_f1": 0.3934072615715215, "eval_loss": 1.100487232208252, "eval_runtime": 46.7591, "eval_samples_per_second": 138.625, "eval_steps_per_second": 8.683, "step": 9500 }, { "epoch": 0.83, "learning_rate": 4.5845105534319435e-05, "loss": 1.0659, "step": 10000 }, { "epoch": 0.83, "eval_f1": 0.3959022659523447, "eval_loss": 1.0948997735977173, "eval_runtime": 46.4254, "eval_samples_per_second": 139.622, "eval_steps_per_second": 8.745, "step": 10000 }, { "epoch": 0.87, "learning_rate": 4.5637360811035404e-05, "loss": 1.0573, "step": 10500 }, { "epoch": 0.87, "eval_f1": 0.4038876072579517, "eval_loss": 1.0948611497879028, "eval_runtime": 46.9168, "eval_samples_per_second": 138.159, "eval_steps_per_second": 8.654, "step": 10500 }, { "epoch": 0.91, "learning_rate": 4.542961608775137e-05, "loss": 1.0725, "step": 11000 }, { "epoch": 0.91, "eval_f1": 0.3986235498061616, "eval_loss": 1.1076061725616455, "eval_runtime": 46.337, "eval_samples_per_second": 139.888, "eval_steps_per_second": 8.762, "step": 11000 }, { "epoch": 0.96, "learning_rate": 4.522187136446734e-05, "loss": 1.0453, "step": 11500 }, { "epoch": 0.96, "eval_f1": 0.39752123118599975, "eval_loss": 1.0838735103607178, "eval_runtime": 46.3441, "eval_samples_per_second": 139.867, "eval_steps_per_second": 8.761, "step": 11500 }, { "epoch": 1.0, "learning_rate": 4.501412664118332e-05, "loss": 1.0594, "step": 12000 }, { "epoch": 1.0, "eval_f1": 0.3901284493425606, "eval_loss": 1.0847594738006592, "eval_runtime": 45.6188, "eval_samples_per_second": 142.091, "eval_steps_per_second": 8.9, "step": 12000 }, { "epoch": 1.04, "learning_rate": 4.480638191789929e-05, "loss": 0.9487, "step": 12500 }, { "epoch": 1.04, "eval_f1": 0.43667878762469675, "eval_loss": 1.1431002616882324, "eval_runtime": 46.1871, "eval_samples_per_second": 140.342, "eval_steps_per_second": 8.79, "step": 12500 }, { "epoch": 1.08, "learning_rate": 4.4598637194615264e-05, "loss": 0.9704, "step": 13000 }, { "epoch": 1.08, "eval_f1": 0.39215933654644525, "eval_loss": 1.1027016639709473, "eval_runtime": 45.9637, "eval_samples_per_second": 141.024, "eval_steps_per_second": 8.833, "step": 13000 }, { "epoch": 1.12, "learning_rate": 4.439089247133123e-05, "loss": 0.9469, "step": 13500 }, { "epoch": 1.12, "eval_f1": 0.4076438023703015, "eval_loss": 1.1772775650024414, "eval_runtime": 45.9053, "eval_samples_per_second": 141.204, "eval_steps_per_second": 8.844, "step": 13500 }, { "epoch": 1.16, "learning_rate": 4.41831477480472e-05, "loss": 0.9325, "step": 14000 }, { "epoch": 1.16, "eval_f1": 0.4386204591653561, "eval_loss": 1.1739530563354492, "eval_runtime": 45.6601, "eval_samples_per_second": 141.962, "eval_steps_per_second": 8.892, "step": 14000 }, { "epoch": 1.2, "learning_rate": 4.397540302476317e-05, "loss": 0.9393, "step": 14500 }, { "epoch": 1.2, "eval_f1": 0.4275788122291597, "eval_loss": 1.1776121854782104, "eval_runtime": 45.9594, "eval_samples_per_second": 141.037, "eval_steps_per_second": 8.834, "step": 14500 }, { "epoch": 1.25, "learning_rate": 4.376765830147914e-05, "loss": 0.9358, "step": 15000 }, { "epoch": 1.25, "eval_f1": 0.402493107642323, "eval_loss": 1.1454391479492188, "eval_runtime": 46.4333, "eval_samples_per_second": 139.598, "eval_steps_per_second": 8.744, "step": 15000 }, { "epoch": 1.29, "learning_rate": 4.355991357819512e-05, "loss": 0.9276, "step": 15500 }, { "epoch": 1.29, "eval_f1": 0.4309881707776124, "eval_loss": 1.1370099782943726, "eval_runtime": 46.0386, "eval_samples_per_second": 140.795, "eval_steps_per_second": 8.819, "step": 15500 }, { "epoch": 1.33, "learning_rate": 4.3352168854911086e-05, "loss": 0.9749, "step": 16000 }, { "epoch": 1.33, "eval_f1": 0.42666296909338014, "eval_loss": 1.147721290588379, "eval_runtime": 46.5256, "eval_samples_per_second": 139.321, "eval_steps_per_second": 8.726, "step": 16000 }, { "epoch": 1.37, "learning_rate": 4.314442413162706e-05, "loss": 0.9584, "step": 16500 }, { "epoch": 1.37, "eval_f1": 0.3917665752135426, "eval_loss": 1.1466563940048218, "eval_runtime": 45.7559, "eval_samples_per_second": 141.665, "eval_steps_per_second": 8.873, "step": 16500 }, { "epoch": 1.41, "learning_rate": 4.293667940834303e-05, "loss": 0.9458, "step": 17000 }, { "epoch": 1.41, "eval_f1": 0.38855363975832957, "eval_loss": 1.1946083307266235, "eval_runtime": 46.3569, "eval_samples_per_second": 139.828, "eval_steps_per_second": 8.758, "step": 17000 }, { "epoch": 1.45, "learning_rate": 4.2728934685059e-05, "loss": 0.9615, "step": 17500 }, { "epoch": 1.45, "eval_f1": 0.4006260567973624, "eval_loss": 1.1700124740600586, "eval_runtime": 46.2179, "eval_samples_per_second": 140.249, "eval_steps_per_second": 8.784, "step": 17500 }, { "epoch": 1.5, "learning_rate": 4.252118996177497e-05, "loss": 0.949, "step": 18000 }, { "epoch": 1.5, "eval_f1": 0.39774908999391234, "eval_loss": 1.1761705875396729, "eval_runtime": 46.6777, "eval_samples_per_second": 138.867, "eval_steps_per_second": 8.698, "step": 18000 }, { "epoch": 1.54, "learning_rate": 4.2313445238490946e-05, "loss": 0.9424, "step": 18500 }, { "epoch": 1.54, "eval_f1": 0.4174117585364745, "eval_loss": 1.165438175201416, "eval_runtime": 45.4426, "eval_samples_per_second": 142.641, "eval_steps_per_second": 8.934, "step": 18500 }, { "epoch": 1.58, "learning_rate": 4.2105700515206915e-05, "loss": 0.947, "step": 19000 }, { "epoch": 1.58, "eval_f1": 0.3867006123407769, "eval_loss": 1.1531673669815063, "eval_runtime": 46.295, "eval_samples_per_second": 140.015, "eval_steps_per_second": 8.77, "step": 19000 }, { "epoch": 1.62, "learning_rate": 4.189795579192289e-05, "loss": 0.938, "step": 19500 }, { "epoch": 1.62, "eval_f1": 0.44235664423629284, "eval_loss": 1.1730421781539917, "eval_runtime": 45.858, "eval_samples_per_second": 141.349, "eval_steps_per_second": 8.853, "step": 19500 }, { "epoch": 1.66, "learning_rate": 4.1690211068638854e-05, "loss": 0.9486, "step": 20000 }, { "epoch": 1.66, "eval_f1": 0.41240105931665383, "eval_loss": 1.1419258117675781, "eval_runtime": 46.2869, "eval_samples_per_second": 140.04, "eval_steps_per_second": 8.771, "step": 20000 }, { "epoch": 1.7, "learning_rate": 4.148246634535483e-05, "loss": 0.9464, "step": 20500 }, { "epoch": 1.7, "eval_f1": 0.4092001043387523, "eval_loss": 1.2019739151000977, "eval_runtime": 46.6507, "eval_samples_per_second": 138.948, "eval_steps_per_second": 8.703, "step": 20500 }, { "epoch": 1.75, "learning_rate": 4.12747216220708e-05, "loss": 0.933, "step": 21000 }, { "epoch": 1.75, "eval_f1": 0.4176306552081639, "eval_loss": 1.1400264501571655, "eval_runtime": 46.9566, "eval_samples_per_second": 138.042, "eval_steps_per_second": 8.646, "step": 21000 }, { "epoch": 1.79, "learning_rate": 4.1066976898786775e-05, "loss": 0.9544, "step": 21500 }, { "epoch": 1.79, "eval_f1": 0.430552427353769, "eval_loss": 1.1604799032211304, "eval_runtime": 47.2589, "eval_samples_per_second": 137.159, "eval_steps_per_second": 8.591, "step": 21500 }, { "epoch": 1.83, "learning_rate": 4.0859232175502744e-05, "loss": 0.9312, "step": 22000 }, { "epoch": 1.83, "eval_f1": 0.39462194466074857, "eval_loss": 1.1545989513397217, "eval_runtime": 46.5344, "eval_samples_per_second": 139.295, "eval_steps_per_second": 8.725, "step": 22000 }, { "epoch": 1.87, "learning_rate": 4.065148745221872e-05, "loss": 0.9458, "step": 22500 }, { "epoch": 1.87, "eval_f1": 0.44503977528303557, "eval_loss": 1.1579736471176147, "eval_runtime": 47.0732, "eval_samples_per_second": 137.7, "eval_steps_per_second": 8.625, "step": 22500 }, { "epoch": 1.91, "learning_rate": 4.044374272893468e-05, "loss": 0.9463, "step": 23000 }, { "epoch": 1.91, "eval_f1": 0.41566594457572864, "eval_loss": 1.1216946840286255, "eval_runtime": 46.6125, "eval_samples_per_second": 139.061, "eval_steps_per_second": 8.71, "step": 23000 }, { "epoch": 1.95, "learning_rate": 4.023599800565066e-05, "loss": 0.9292, "step": 23500 }, { "epoch": 1.95, "eval_f1": 0.3965530806436371, "eval_loss": 1.1553888320922852, "eval_runtime": 46.7596, "eval_samples_per_second": 138.624, "eval_steps_per_second": 8.683, "step": 23500 }, { "epoch": 1.99, "learning_rate": 4.002825328236663e-05, "loss": 0.9286, "step": 24000 }, { "epoch": 1.99, "eval_f1": 0.4010412760625175, "eval_loss": 1.1262454986572266, "eval_runtime": 46.3591, "eval_samples_per_second": 139.822, "eval_steps_per_second": 8.758, "step": 24000 }, { "epoch": 2.04, "learning_rate": 3.9820508559082604e-05, "loss": 0.7944, "step": 24500 }, { "epoch": 2.04, "eval_f1": 0.43825195451845356, "eval_loss": 1.2961622476577759, "eval_runtime": 46.8176, "eval_samples_per_second": 138.452, "eval_steps_per_second": 8.672, "step": 24500 }, { "epoch": 2.08, "learning_rate": 3.961276383579857e-05, "loss": 0.8003, "step": 25000 }, { "epoch": 2.08, "eval_f1": 0.4380376882658572, "eval_loss": 1.268869161605835, "eval_runtime": 52.7668, "eval_samples_per_second": 122.842, "eval_steps_per_second": 7.694, "step": 25000 }, { "epoch": 2.12, "learning_rate": 3.940501911251455e-05, "loss": 0.7792, "step": 25500 }, { "epoch": 2.12, "eval_f1": 0.4432590805872725, "eval_loss": 1.2123405933380127, "eval_runtime": 46.7198, "eval_samples_per_second": 138.742, "eval_steps_per_second": 8.69, "step": 25500 }, { "epoch": 2.16, "learning_rate": 3.919727438923051e-05, "loss": 0.79, "step": 26000 }, { "epoch": 2.16, "eval_f1": 0.44452383932580275, "eval_loss": 1.2517160177230835, "eval_runtime": 46.1817, "eval_samples_per_second": 140.359, "eval_steps_per_second": 8.791, "step": 26000 }, { "epoch": 2.2, "learning_rate": 3.898952966594649e-05, "loss": 0.7984, "step": 26500 }, { "epoch": 2.2, "eval_f1": 0.4221078291607028, "eval_loss": 1.2184810638427734, "eval_runtime": 46.3545, "eval_samples_per_second": 139.835, "eval_steps_per_second": 8.759, "step": 26500 }, { "epoch": 2.24, "learning_rate": 3.878178494266246e-05, "loss": 0.7952, "step": 27000 }, { "epoch": 2.24, "eval_f1": 0.439579966107181, "eval_loss": 1.2801449298858643, "eval_runtime": 46.2023, "eval_samples_per_second": 140.296, "eval_steps_per_second": 8.787, "step": 27000 }, { "epoch": 2.29, "learning_rate": 3.857404021937843e-05, "loss": 0.8055, "step": 27500 }, { "epoch": 2.29, "eval_f1": 0.4544774942196613, "eval_loss": 1.2639812231063843, "eval_runtime": 46.0512, "eval_samples_per_second": 140.757, "eval_steps_per_second": 8.816, "step": 27500 }, { "epoch": 2.33, "learning_rate": 3.83662954960944e-05, "loss": 0.8084, "step": 28000 }, { "epoch": 2.33, "eval_f1": 0.44333487259913823, "eval_loss": 1.228055477142334, "eval_runtime": 46.2747, "eval_samples_per_second": 140.077, "eval_steps_per_second": 8.774, "step": 28000 }, { "epoch": 2.37, "learning_rate": 3.815855077281037e-05, "loss": 0.7904, "step": 28500 }, { "epoch": 2.37, "eval_f1": 0.43992159119888363, "eval_loss": 1.2493727207183838, "eval_runtime": 46.321, "eval_samples_per_second": 139.937, "eval_steps_per_second": 8.765, "step": 28500 }, { "epoch": 2.41, "learning_rate": 3.795080604952634e-05, "loss": 0.8057, "step": 29000 }, { "epoch": 2.41, "eval_f1": 0.4114745835354577, "eval_loss": 1.2447556257247925, "eval_runtime": 46.3954, "eval_samples_per_second": 139.712, "eval_steps_per_second": 8.751, "step": 29000 }, { "epoch": 2.45, "learning_rate": 3.774306132624231e-05, "loss": 0.8001, "step": 29500 }, { "epoch": 2.45, "eval_f1": 0.41230473935117545, "eval_loss": 1.2784521579742432, "eval_runtime": 46.222, "eval_samples_per_second": 140.236, "eval_steps_per_second": 8.784, "step": 29500 }, { "epoch": 2.49, "learning_rate": 3.7535316602958286e-05, "loss": 0.8293, "step": 30000 }, { "epoch": 2.49, "eval_f1": 0.4303954902219652, "eval_loss": 1.1889426708221436, "eval_runtime": 45.554, "eval_samples_per_second": 142.293, "eval_steps_per_second": 8.913, "step": 30000 }, { "epoch": 2.53, "learning_rate": 3.7327571879674255e-05, "loss": 0.8194, "step": 30500 }, { "epoch": 2.53, "eval_f1": 0.4301727119748369, "eval_loss": 1.2015577554702759, "eval_runtime": 46.1992, "eval_samples_per_second": 140.305, "eval_steps_per_second": 8.788, "step": 30500 }, { "epoch": 2.58, "learning_rate": 3.711982715639023e-05, "loss": 0.8028, "step": 31000 }, { "epoch": 2.58, "eval_f1": 0.44964098289977084, "eval_loss": 1.2026257514953613, "eval_runtime": 45.6586, "eval_samples_per_second": 141.967, "eval_steps_per_second": 8.892, "step": 31000 }, { "epoch": 2.62, "learning_rate": 3.69120824331062e-05, "loss": 0.8123, "step": 31500 }, { "epoch": 2.62, "eval_f1": 0.4305026988222712, "eval_loss": 1.2430651187896729, "eval_runtime": 45.6789, "eval_samples_per_second": 141.904, "eval_steps_per_second": 8.888, "step": 31500 }, { "epoch": 2.66, "learning_rate": 3.670433770982217e-05, "loss": 0.7941, "step": 32000 }, { "epoch": 2.66, "eval_f1": 0.4185262770510854, "eval_loss": 1.2300126552581787, "eval_runtime": 46.2407, "eval_samples_per_second": 140.179, "eval_steps_per_second": 8.78, "step": 32000 }, { "epoch": 2.7, "learning_rate": 3.649659298653814e-05, "loss": 0.7815, "step": 32500 }, { "epoch": 2.7, "eval_f1": 0.42807790167507703, "eval_loss": 1.3011759519577026, "eval_runtime": 45.2153, "eval_samples_per_second": 143.358, "eval_steps_per_second": 8.979, "step": 32500 }, { "epoch": 2.74, "learning_rate": 3.6288848263254115e-05, "loss": 0.8081, "step": 33000 }, { "epoch": 2.74, "eval_f1": 0.440687373165412, "eval_loss": 1.253546953201294, "eval_runtime": 45.7395, "eval_samples_per_second": 141.715, "eval_steps_per_second": 8.876, "step": 33000 }, { "epoch": 2.78, "learning_rate": 3.6081103539970084e-05, "loss": 0.8086, "step": 33500 }, { "epoch": 2.78, "eval_f1": 0.44928394998593935, "eval_loss": 1.2568650245666504, "eval_runtime": 45.7963, "eval_samples_per_second": 141.54, "eval_steps_per_second": 8.865, "step": 33500 }, { "epoch": 2.83, "learning_rate": 3.587335881668606e-05, "loss": 0.7858, "step": 34000 }, { "epoch": 2.83, "eval_f1": 0.43231367666538134, "eval_loss": 1.2376387119293213, "eval_runtime": 46.5395, "eval_samples_per_second": 139.279, "eval_steps_per_second": 8.724, "step": 34000 }, { "epoch": 2.87, "learning_rate": 3.566561409340203e-05, "loss": 0.8065, "step": 34500 }, { "epoch": 2.87, "eval_f1": 0.42028961729158787, "eval_loss": 1.2222144603729248, "eval_runtime": 46.2907, "eval_samples_per_second": 140.028, "eval_steps_per_second": 8.771, "step": 34500 }, { "epoch": 2.91, "learning_rate": 3.5457869370118e-05, "loss": 0.7991, "step": 35000 }, { "epoch": 2.91, "eval_f1": 0.4231707776654273, "eval_loss": 1.250239372253418, "eval_runtime": 45.795, "eval_samples_per_second": 141.544, "eval_steps_per_second": 8.866, "step": 35000 }, { "epoch": 2.95, "learning_rate": 3.525012464683397e-05, "loss": 0.816, "step": 35500 }, { "epoch": 2.95, "eval_f1": 0.4231928313001403, "eval_loss": 1.2436952590942383, "eval_runtime": 45.9669, "eval_samples_per_second": 141.014, "eval_steps_per_second": 8.832, "step": 35500 }, { "epoch": 2.99, "learning_rate": 3.5042379923549944e-05, "loss": 0.8093, "step": 36000 }, { "epoch": 2.99, "eval_f1": 0.40722455645687655, "eval_loss": 1.1901623010635376, "eval_runtime": 45.8966, "eval_samples_per_second": 141.231, "eval_steps_per_second": 8.846, "step": 36000 }, { "epoch": 3.03, "learning_rate": 3.483463520026591e-05, "loss": 0.6567, "step": 36500 }, { "epoch": 3.03, "eval_f1": 0.4358392298680451, "eval_loss": 1.4940779209136963, "eval_runtime": 46.3646, "eval_samples_per_second": 139.805, "eval_steps_per_second": 8.757, "step": 36500 }, { "epoch": 3.07, "learning_rate": 3.462689047698189e-05, "loss": 0.6304, "step": 37000 }, { "epoch": 3.07, "eval_f1": 0.4248894510967205, "eval_loss": 1.471817135810852, "eval_runtime": 46.8512, "eval_samples_per_second": 138.353, "eval_steps_per_second": 8.666, "step": 37000 }, { "epoch": 3.12, "learning_rate": 3.441914575369786e-05, "loss": 0.6454, "step": 37500 }, { "epoch": 3.12, "eval_f1": 0.427949493239817, "eval_loss": 1.484312891960144, "eval_runtime": 46.3345, "eval_samples_per_second": 139.896, "eval_steps_per_second": 8.762, "step": 37500 }, { "epoch": 3.16, "learning_rate": 3.421140103041383e-05, "loss": 0.6654, "step": 38000 }, { "epoch": 3.16, "eval_f1": 0.43504779862979137, "eval_loss": 1.4933797121047974, "eval_runtime": 45.9895, "eval_samples_per_second": 140.945, "eval_steps_per_second": 8.828, "step": 38000 }, { "epoch": 3.2, "learning_rate": 3.40036563071298e-05, "loss": 0.6478, "step": 38500 }, { "epoch": 3.2, "eval_f1": 0.4310701120380386, "eval_loss": 1.4152840375900269, "eval_runtime": 46.2675, "eval_samples_per_second": 140.098, "eval_steps_per_second": 8.775, "step": 38500 }, { "epoch": 3.24, "learning_rate": 3.379591158384577e-05, "loss": 0.637, "step": 39000 }, { "epoch": 3.24, "eval_f1": 0.44398856573642115, "eval_loss": 1.3994076251983643, "eval_runtime": 45.8114, "eval_samples_per_second": 141.493, "eval_steps_per_second": 8.862, "step": 39000 }, { "epoch": 3.28, "learning_rate": 3.358816686056174e-05, "loss": 0.6398, "step": 39500 }, { "epoch": 3.28, "eval_f1": 0.42669623237681525, "eval_loss": 1.5294607877731323, "eval_runtime": 46.4175, "eval_samples_per_second": 139.646, "eval_steps_per_second": 8.747, "step": 39500 }, { "epoch": 3.32, "learning_rate": 3.338042213727772e-05, "loss": 0.6703, "step": 40000 }, { "epoch": 3.32, "eval_f1": 0.4287387139396017, "eval_loss": 1.3941184282302856, "eval_runtime": 46.3839, "eval_samples_per_second": 139.747, "eval_steps_per_second": 8.753, "step": 40000 }, { "epoch": 3.37, "learning_rate": 3.317267741399369e-05, "loss": 0.6442, "step": 40500 }, { "epoch": 3.37, "eval_f1": 0.4371709367951578, "eval_loss": 1.388688564300537, "eval_runtime": 46.5417, "eval_samples_per_second": 139.273, "eval_steps_per_second": 8.723, "step": 40500 }, { "epoch": 3.41, "learning_rate": 3.296493269070966e-05, "loss": 0.6784, "step": 41000 }, { "epoch": 3.41, "eval_f1": 0.43751727834027077, "eval_loss": 1.3877114057540894, "eval_runtime": 45.9117, "eval_samples_per_second": 141.184, "eval_steps_per_second": 8.843, "step": 41000 }, { "epoch": 3.45, "learning_rate": 3.2757187967425626e-05, "loss": 0.6614, "step": 41500 }, { "epoch": 3.45, "eval_f1": 0.4372541317801921, "eval_loss": 1.4126884937286377, "eval_runtime": 46.4825, "eval_samples_per_second": 139.45, "eval_steps_per_second": 8.734, "step": 41500 }, { "epoch": 3.49, "learning_rate": 3.25494432441416e-05, "loss": 0.6864, "step": 42000 }, { "epoch": 3.49, "eval_f1": 0.42774626956090644, "eval_loss": 1.4882549047470093, "eval_runtime": 46.0334, "eval_samples_per_second": 140.811, "eval_steps_per_second": 8.82, "step": 42000 }, { "epoch": 3.53, "learning_rate": 3.234169852085757e-05, "loss": 0.6636, "step": 42500 }, { "epoch": 3.53, "eval_f1": 0.4248446501416097, "eval_loss": 1.3951656818389893, "eval_runtime": 45.7857, "eval_samples_per_second": 141.573, "eval_steps_per_second": 8.867, "step": 42500 }, { "epoch": 3.57, "learning_rate": 3.213395379757354e-05, "loss": 0.6801, "step": 43000 }, { "epoch": 3.57, "eval_f1": 0.41563433666965854, "eval_loss": 1.4469736814498901, "eval_runtime": 46.1733, "eval_samples_per_second": 140.384, "eval_steps_per_second": 8.793, "step": 43000 }, { "epoch": 3.61, "learning_rate": 3.1926209074289517e-05, "loss": 0.6509, "step": 43500 }, { "epoch": 3.61, "eval_f1": 0.42981854634578975, "eval_loss": 1.3635119199752808, "eval_runtime": 46.4023, "eval_samples_per_second": 139.691, "eval_steps_per_second": 8.75, "step": 43500 }, { "epoch": 3.66, "learning_rate": 3.1718464351005486e-05, "loss": 0.6776, "step": 44000 }, { "epoch": 3.66, "eval_f1": 0.4345867863354599, "eval_loss": 1.3212920427322388, "eval_runtime": 46.0513, "eval_samples_per_second": 140.756, "eval_steps_per_second": 8.816, "step": 44000 }, { "epoch": 3.7, "learning_rate": 3.1510719627721455e-05, "loss": 0.6686, "step": 44500 }, { "epoch": 3.7, "eval_f1": 0.4283615179772263, "eval_loss": 1.3529335260391235, "eval_runtime": 45.8914, "eval_samples_per_second": 141.247, "eval_steps_per_second": 8.847, "step": 44500 }, { "epoch": 3.74, "learning_rate": 3.1302974904437424e-05, "loss": 0.6696, "step": 45000 }, { "epoch": 3.74, "eval_f1": 0.42784477364430307, "eval_loss": 1.3639956712722778, "eval_runtime": 45.6293, "eval_samples_per_second": 142.058, "eval_steps_per_second": 8.898, "step": 45000 }, { "epoch": 3.78, "learning_rate": 3.10952301811534e-05, "loss": 0.6624, "step": 45500 }, { "epoch": 3.78, "eval_f1": 0.4325377081208613, "eval_loss": 1.409765601158142, "eval_runtime": 47.4821, "eval_samples_per_second": 136.515, "eval_steps_per_second": 8.551, "step": 45500 }, { "epoch": 3.82, "learning_rate": 3.088748545786937e-05, "loss": 0.6876, "step": 46000 }, { "epoch": 3.82, "eval_f1": 0.4282077409143604, "eval_loss": 1.4017492532730103, "eval_runtime": 46.1534, "eval_samples_per_second": 140.445, "eval_steps_per_second": 8.797, "step": 46000 }, { "epoch": 3.86, "learning_rate": 3.0679740734585346e-05, "loss": 0.671, "step": 46500 }, { "epoch": 3.86, "eval_f1": 0.4172019162341494, "eval_loss": 1.3028308153152466, "eval_runtime": 46.2659, "eval_samples_per_second": 140.103, "eval_steps_per_second": 8.775, "step": 46500 }, { "epoch": 3.91, "learning_rate": 3.047199601130131e-05, "loss": 0.68, "step": 47000 }, { "epoch": 3.91, "eval_f1": 0.43831774420843844, "eval_loss": 1.3964955806732178, "eval_runtime": 45.9895, "eval_samples_per_second": 140.945, "eval_steps_per_second": 8.828, "step": 47000 }, { "epoch": 3.95, "learning_rate": 3.0264251288017287e-05, "loss": 0.6715, "step": 47500 }, { "epoch": 3.95, "eval_f1": 0.4301624961844753, "eval_loss": 1.3572640419006348, "eval_runtime": 45.9503, "eval_samples_per_second": 141.065, "eval_steps_per_second": 8.836, "step": 47500 }, { "epoch": 3.99, "learning_rate": 3.0056506564733257e-05, "loss": 0.697, "step": 48000 }, { "epoch": 3.99, "eval_f1": 0.4253265083713992, "eval_loss": 1.3642019033432007, "eval_runtime": 46.1149, "eval_samples_per_second": 140.562, "eval_steps_per_second": 8.804, "step": 48000 }, { "epoch": 4.03, "learning_rate": 2.984876184144923e-05, "loss": 0.5631, "step": 48500 }, { "epoch": 4.03, "eval_f1": 0.433265159904986, "eval_loss": 1.5990760326385498, "eval_runtime": 45.3111, "eval_samples_per_second": 143.055, "eval_steps_per_second": 8.96, "step": 48500 }, { "epoch": 4.07, "learning_rate": 2.96410171181652e-05, "loss": 0.5151, "step": 49000 }, { "epoch": 4.07, "eval_f1": 0.4295485993747164, "eval_loss": 1.6384857892990112, "eval_runtime": 46.4055, "eval_samples_per_second": 139.682, "eval_steps_per_second": 8.749, "step": 49000 }, { "epoch": 4.11, "learning_rate": 2.943327239488117e-05, "loss": 0.5348, "step": 49500 }, { "epoch": 4.11, "eval_f1": 0.4240371984001696, "eval_loss": 1.5903598070144653, "eval_runtime": 45.9488, "eval_samples_per_second": 141.07, "eval_steps_per_second": 8.836, "step": 49500 }, { "epoch": 4.15, "learning_rate": 2.922552767159714e-05, "loss": 0.5288, "step": 50000 }, { "epoch": 4.15, "eval_f1": 0.42810904253980414, "eval_loss": 1.6144169569015503, "eval_runtime": 46.451, "eval_samples_per_second": 139.545, "eval_steps_per_second": 8.74, "step": 50000 }, { "epoch": 4.2, "learning_rate": 2.9017782948313117e-05, "loss": 0.5422, "step": 50500 }, { "epoch": 4.2, "eval_f1": 0.4302607705379434, "eval_loss": 1.7097866535186768, "eval_runtime": 46.1436, "eval_samples_per_second": 140.475, "eval_steps_per_second": 8.799, "step": 50500 }, { "epoch": 4.24, "learning_rate": 2.8810038225029086e-05, "loss": 0.548, "step": 51000 }, { "epoch": 4.24, "eval_f1": 0.4281896068096123, "eval_loss": 1.573617935180664, "eval_runtime": 46.7725, "eval_samples_per_second": 138.586, "eval_steps_per_second": 8.68, "step": 51000 }, { "epoch": 4.28, "learning_rate": 2.860229350174506e-05, "loss": 0.5169, "step": 51500 }, { "epoch": 4.28, "eval_f1": 0.4336268684648746, "eval_loss": 1.5596050024032593, "eval_runtime": 45.9526, "eval_samples_per_second": 141.058, "eval_steps_per_second": 8.835, "step": 51500 }, { "epoch": 4.32, "learning_rate": 2.8394548778461028e-05, "loss": 0.5327, "step": 52000 }, { "epoch": 4.32, "eval_f1": 0.4223419321700975, "eval_loss": 1.6166974306106567, "eval_runtime": 46.03, "eval_samples_per_second": 140.821, "eval_steps_per_second": 8.82, "step": 52000 }, { "epoch": 4.36, "learning_rate": 2.8186804055177e-05, "loss": 0.5343, "step": 52500 }, { "epoch": 4.36, "eval_f1": 0.4428612616903695, "eval_loss": 1.7605165243148804, "eval_runtime": 45.6649, "eval_samples_per_second": 141.947, "eval_steps_per_second": 8.891, "step": 52500 }, { "epoch": 4.4, "learning_rate": 2.797905933189297e-05, "loss": 0.5478, "step": 53000 }, { "epoch": 4.4, "eval_f1": 0.44071411652178355, "eval_loss": 1.6004695892333984, "eval_runtime": 45.993, "eval_samples_per_second": 140.935, "eval_steps_per_second": 8.827, "step": 53000 }, { "epoch": 4.45, "learning_rate": 2.7771314608608946e-05, "loss": 0.5489, "step": 53500 }, { "epoch": 4.45, "eval_f1": 0.44084931155643786, "eval_loss": 1.642219066619873, "eval_runtime": 45.869, "eval_samples_per_second": 141.316, "eval_steps_per_second": 8.851, "step": 53500 }, { "epoch": 4.49, "learning_rate": 2.7563569885324915e-05, "loss": 0.5388, "step": 54000 }, { "epoch": 4.49, "eval_f1": 0.4372812211041268, "eval_loss": 1.7352898120880127, "eval_runtime": 46.4558, "eval_samples_per_second": 139.53, "eval_steps_per_second": 8.739, "step": 54000 }, { "epoch": 4.53, "learning_rate": 2.7355825162040887e-05, "loss": 0.5312, "step": 54500 }, { "epoch": 4.53, "eval_f1": 0.42873515159285114, "eval_loss": 1.6332671642303467, "eval_runtime": 46.1955, "eval_samples_per_second": 140.317, "eval_steps_per_second": 8.789, "step": 54500 }, { "epoch": 4.57, "learning_rate": 2.7148080438756857e-05, "loss": 0.5369, "step": 55000 }, { "epoch": 4.57, "eval_f1": 0.4392093355525315, "eval_loss": 1.5618759393692017, "eval_runtime": 47.5099, "eval_samples_per_second": 136.435, "eval_steps_per_second": 8.546, "step": 55000 }, { "epoch": 4.61, "learning_rate": 2.694033571547283e-05, "loss": 0.5475, "step": 55500 }, { "epoch": 4.61, "eval_f1": 0.42820616668057093, "eval_loss": 1.583003282546997, "eval_runtime": 46.2808, "eval_samples_per_second": 140.058, "eval_steps_per_second": 8.773, "step": 55500 }, { "epoch": 4.65, "learning_rate": 2.67325909921888e-05, "loss": 0.5622, "step": 56000 }, { "epoch": 4.65, "eval_f1": 0.4405394714296675, "eval_loss": 1.5289151668548584, "eval_runtime": 46.2454, "eval_samples_per_second": 140.165, "eval_steps_per_second": 8.779, "step": 56000 }, { "epoch": 4.7, "learning_rate": 2.6524846268904768e-05, "loss": 0.5662, "step": 56500 }, { "epoch": 4.7, "eval_f1": 0.43493209599396276, "eval_loss": 1.5689671039581299, "eval_runtime": 46.2429, "eval_samples_per_second": 140.173, "eval_steps_per_second": 8.78, "step": 56500 }, { "epoch": 4.74, "learning_rate": 2.6317101545620744e-05, "loss": 0.5373, "step": 57000 }, { "epoch": 4.74, "eval_f1": 0.4302946240864107, "eval_loss": 1.6275018453598022, "eval_runtime": 46.0239, "eval_samples_per_second": 140.84, "eval_steps_per_second": 8.821, "step": 57000 }, { "epoch": 4.78, "learning_rate": 2.610935682233671e-05, "loss": 0.5584, "step": 57500 }, { "epoch": 4.78, "eval_f1": 0.43485188110217615, "eval_loss": 1.7044297456741333, "eval_runtime": 46.2954, "eval_samples_per_second": 140.014, "eval_steps_per_second": 8.77, "step": 57500 }, { "epoch": 4.82, "learning_rate": 2.5901612099052686e-05, "loss": 0.5484, "step": 58000 }, { "epoch": 4.82, "eval_f1": 0.43712471779629325, "eval_loss": 1.6315213441848755, "eval_runtime": 46.0252, "eval_samples_per_second": 140.836, "eval_steps_per_second": 8.821, "step": 58000 }, { "epoch": 4.86, "learning_rate": 2.5693867375768655e-05, "loss": 0.5475, "step": 58500 }, { "epoch": 4.86, "eval_f1": 0.446221258616921, "eval_loss": 1.5129351615905762, "eval_runtime": 46.4009, "eval_samples_per_second": 139.696, "eval_steps_per_second": 8.75, "step": 58500 }, { "epoch": 4.9, "learning_rate": 2.5486122652484628e-05, "loss": 0.5551, "step": 59000 }, { "epoch": 4.9, "eval_f1": 0.4409313233994151, "eval_loss": 1.637054204940796, "eval_runtime": 46.0061, "eval_samples_per_second": 140.894, "eval_steps_per_second": 8.825, "step": 59000 }, { "epoch": 4.94, "learning_rate": 2.5278377929200597e-05, "loss": 0.558, "step": 59500 }, { "epoch": 4.94, "eval_f1": 0.4335536795951597, "eval_loss": 1.5173062086105347, "eval_runtime": 46.0786, "eval_samples_per_second": 140.673, "eval_steps_per_second": 8.811, "step": 59500 }, { "epoch": 4.99, "learning_rate": 2.5070633205916573e-05, "loss": 0.5553, "step": 60000 }, { "epoch": 4.99, "eval_f1": 0.4349357425201839, "eval_loss": 1.564207911491394, "eval_runtime": 45.533, "eval_samples_per_second": 142.358, "eval_steps_per_second": 8.917, "step": 60000 }, { "epoch": 5.03, "learning_rate": 2.4862888482632542e-05, "loss": 0.4491, "step": 60500 }, { "epoch": 5.03, "eval_f1": 0.43614659724704335, "eval_loss": 1.9311244487762451, "eval_runtime": 46.1968, "eval_samples_per_second": 140.313, "eval_steps_per_second": 8.788, "step": 60500 }, { "epoch": 5.07, "learning_rate": 2.4655143759348515e-05, "loss": 0.403, "step": 61000 }, { "epoch": 5.07, "eval_f1": 0.4393254273429778, "eval_loss": 2.0766100883483887, "eval_runtime": 45.9492, "eval_samples_per_second": 141.069, "eval_steps_per_second": 8.836, "step": 61000 }, { "epoch": 5.11, "learning_rate": 2.4447399036064487e-05, "loss": 0.4233, "step": 61500 }, { "epoch": 5.11, "eval_f1": 0.43421874747799094, "eval_loss": 2.0252885818481445, "eval_runtime": 46.6362, "eval_samples_per_second": 138.991, "eval_steps_per_second": 8.706, "step": 61500 }, { "epoch": 5.15, "learning_rate": 2.4239654312780457e-05, "loss": 0.4412, "step": 62000 }, { "epoch": 5.15, "eval_f1": 0.43655258613920295, "eval_loss": 2.0584676265716553, "eval_runtime": 46.1023, "eval_samples_per_second": 140.6, "eval_steps_per_second": 8.807, "step": 62000 }, { "epoch": 5.19, "learning_rate": 2.403190958949643e-05, "loss": 0.4477, "step": 62500 }, { "epoch": 5.19, "eval_f1": 0.445008636219538, "eval_loss": 1.9808226823806763, "eval_runtime": 46.3962, "eval_samples_per_second": 139.71, "eval_steps_per_second": 8.751, "step": 62500 }, { "epoch": 5.24, "learning_rate": 2.3824164866212402e-05, "loss": 0.4497, "step": 63000 }, { "epoch": 5.24, "eval_f1": 0.44325755380966075, "eval_loss": 1.8606414794921875, "eval_runtime": 46.1573, "eval_samples_per_second": 140.433, "eval_steps_per_second": 8.796, "step": 63000 }, { "epoch": 5.28, "learning_rate": 2.361642014292837e-05, "loss": 0.4415, "step": 63500 }, { "epoch": 5.28, "eval_f1": 0.44300106609021345, "eval_loss": 2.060542106628418, "eval_runtime": 45.615, "eval_samples_per_second": 142.102, "eval_steps_per_second": 8.901, "step": 63500 }, { "epoch": 5.32, "learning_rate": 2.340867541964434e-05, "loss": 0.4655, "step": 64000 }, { "epoch": 5.32, "eval_f1": 0.4281324149819077, "eval_loss": 1.7479959726333618, "eval_runtime": 45.9941, "eval_samples_per_second": 140.931, "eval_steps_per_second": 8.827, "step": 64000 }, { "epoch": 5.36, "learning_rate": 2.3200930696360313e-05, "loss": 0.4395, "step": 64500 }, { "epoch": 5.36, "eval_f1": 0.4315119731593183, "eval_loss": 1.851706862449646, "eval_runtime": 45.6959, "eval_samples_per_second": 141.851, "eval_steps_per_second": 8.885, "step": 64500 }, { "epoch": 5.4, "learning_rate": 2.2993185973076282e-05, "loss": 0.4738, "step": 65000 }, { "epoch": 5.4, "eval_f1": 0.4246487511252701, "eval_loss": 1.7510011196136475, "eval_runtime": 46.0796, "eval_samples_per_second": 140.67, "eval_steps_per_second": 8.811, "step": 65000 }, { "epoch": 5.44, "learning_rate": 2.2785441249792255e-05, "loss": 0.455, "step": 65500 }, { "epoch": 5.44, "eval_f1": 0.42367965460597234, "eval_loss": 1.7951207160949707, "eval_runtime": 46.1571, "eval_samples_per_second": 140.433, "eval_steps_per_second": 8.796, "step": 65500 }, { "epoch": 5.48, "learning_rate": 2.2577696526508228e-05, "loss": 0.4494, "step": 66000 }, { "epoch": 5.48, "eval_f1": 0.4400121441304865, "eval_loss": 1.8601397275924683, "eval_runtime": 46.6249, "eval_samples_per_second": 139.024, "eval_steps_per_second": 8.708, "step": 66000 }, { "epoch": 5.53, "learning_rate": 2.2369951803224197e-05, "loss": 0.4364, "step": 66500 }, { "epoch": 5.53, "eval_f1": 0.4454509847031083, "eval_loss": 1.9597169160842896, "eval_runtime": 45.9584, "eval_samples_per_second": 141.041, "eval_steps_per_second": 8.834, "step": 66500 }, { "epoch": 5.57, "learning_rate": 2.216220707994017e-05, "loss": 0.4611, "step": 67000 }, { "epoch": 5.57, "eval_f1": 0.44059704673003397, "eval_loss": 1.899012804031372, "eval_runtime": 47.0101, "eval_samples_per_second": 137.885, "eval_steps_per_second": 8.636, "step": 67000 }, { "epoch": 5.61, "learning_rate": 2.1954462356656142e-05, "loss": 0.4434, "step": 67500 }, { "epoch": 5.61, "eval_f1": 0.44119414948642377, "eval_loss": 1.983310341835022, "eval_runtime": 45.9947, "eval_samples_per_second": 140.929, "eval_steps_per_second": 8.827, "step": 67500 }, { "epoch": 5.65, "learning_rate": 2.174671763337211e-05, "loss": 0.45, "step": 68000 }, { "epoch": 5.65, "eval_f1": 0.4426699728777116, "eval_loss": 1.8643006086349487, "eval_runtime": 46.7241, "eval_samples_per_second": 138.729, "eval_steps_per_second": 8.689, "step": 68000 }, { "epoch": 5.69, "learning_rate": 2.1538972910088084e-05, "loss": 0.4657, "step": 68500 }, { "epoch": 5.69, "eval_f1": 0.4459768786824306, "eval_loss": 1.9347878694534302, "eval_runtime": 46.4171, "eval_samples_per_second": 139.647, "eval_steps_per_second": 8.747, "step": 68500 }, { "epoch": 5.73, "learning_rate": 2.1331228186804057e-05, "loss": 0.4536, "step": 69000 }, { "epoch": 5.73, "eval_f1": 0.43967737232771253, "eval_loss": 1.9800372123718262, "eval_runtime": 46.5899, "eval_samples_per_second": 139.129, "eval_steps_per_second": 8.714, "step": 69000 }, { "epoch": 5.78, "learning_rate": 2.1123483463520026e-05, "loss": 0.4665, "step": 69500 }, { "epoch": 5.78, "eval_f1": 0.43522687025203594, "eval_loss": 1.7668453454971313, "eval_runtime": 45.8529, "eval_samples_per_second": 141.365, "eval_steps_per_second": 8.854, "step": 69500 }, { "epoch": 5.82, "learning_rate": 2.0915738740236e-05, "loss": 0.4668, "step": 70000 }, { "epoch": 5.82, "eval_f1": 0.43357794081082646, "eval_loss": 1.8984841108322144, "eval_runtime": 46.4335, "eval_samples_per_second": 139.597, "eval_steps_per_second": 8.744, "step": 70000 }, { "epoch": 5.86, "learning_rate": 2.070799401695197e-05, "loss": 0.4622, "step": 70500 }, { "epoch": 5.86, "eval_f1": 0.4323966893074926, "eval_loss": 1.8252310752868652, "eval_runtime": 45.8779, "eval_samples_per_second": 141.288, "eval_steps_per_second": 8.85, "step": 70500 }, { "epoch": 5.9, "learning_rate": 2.050024929366794e-05, "loss": 0.4766, "step": 71000 }, { "epoch": 5.9, "eval_f1": 0.44080051637694073, "eval_loss": 1.7060314416885376, "eval_runtime": 46.3123, "eval_samples_per_second": 139.963, "eval_steps_per_second": 8.767, "step": 71000 }, { "epoch": 5.94, "learning_rate": 2.0292504570383913e-05, "loss": 0.4476, "step": 71500 }, { "epoch": 5.94, "eval_f1": 0.4385964321265162, "eval_loss": 1.8184629678726196, "eval_runtime": 45.9872, "eval_samples_per_second": 140.952, "eval_steps_per_second": 8.829, "step": 71500 }, { "epoch": 5.98, "learning_rate": 2.0084759847099886e-05, "loss": 0.4602, "step": 72000 }, { "epoch": 5.98, "eval_f1": 0.44088418061549356, "eval_loss": 1.7630596160888672, "eval_runtime": 46.6035, "eval_samples_per_second": 139.088, "eval_steps_per_second": 8.712, "step": 72000 }, { "epoch": 6.02, "learning_rate": 1.9877015123815855e-05, "loss": 0.3679, "step": 72500 }, { "epoch": 6.02, "eval_f1": 0.4346630256587338, "eval_loss": 2.1265206336975098, "eval_runtime": 46.1031, "eval_samples_per_second": 140.598, "eval_steps_per_second": 8.806, "step": 72500 }, { "epoch": 6.07, "learning_rate": 1.9669270400531827e-05, "loss": 0.3708, "step": 73000 }, { "epoch": 6.07, "eval_f1": 0.4350030580705022, "eval_loss": 2.233613967895508, "eval_runtime": 46.3001, "eval_samples_per_second": 140.0, "eval_steps_per_second": 8.769, "step": 73000 }, { "epoch": 6.11, "learning_rate": 1.94615256772478e-05, "loss": 0.3761, "step": 73500 }, { "epoch": 6.11, "eval_f1": 0.4315408453045143, "eval_loss": 2.161686420440674, "eval_runtime": 46.0645, "eval_samples_per_second": 140.716, "eval_steps_per_second": 8.814, "step": 73500 }, { "epoch": 6.15, "learning_rate": 1.925378095396377e-05, "loss": 0.382, "step": 74000 }, { "epoch": 6.15, "eval_f1": 0.4300930630733001, "eval_loss": 2.2093658447265625, "eval_runtime": 46.5249, "eval_samples_per_second": 139.323, "eval_steps_per_second": 8.727, "step": 74000 }, { "epoch": 6.19, "learning_rate": 1.9046036230679742e-05, "loss": 0.3606, "step": 74500 }, { "epoch": 6.19, "eval_f1": 0.4263466007824531, "eval_loss": 2.422569751739502, "eval_runtime": 46.1496, "eval_samples_per_second": 140.456, "eval_steps_per_second": 8.797, "step": 74500 }, { "epoch": 6.23, "learning_rate": 1.8838291507395715e-05, "loss": 0.3929, "step": 75000 }, { "epoch": 6.23, "eval_f1": 0.42870899531401374, "eval_loss": 2.2339413166046143, "eval_runtime": 45.9626, "eval_samples_per_second": 141.028, "eval_steps_per_second": 8.833, "step": 75000 }, { "epoch": 6.27, "learning_rate": 1.8630546784111684e-05, "loss": 0.3761, "step": 75500 }, { "epoch": 6.27, "eval_f1": 0.43084241603610085, "eval_loss": 2.245156764984131, "eval_runtime": 45.9689, "eval_samples_per_second": 141.008, "eval_steps_per_second": 8.832, "step": 75500 }, { "epoch": 6.32, "learning_rate": 1.8422802060827657e-05, "loss": 0.3735, "step": 76000 }, { "epoch": 6.32, "eval_f1": 0.42730161492497354, "eval_loss": 2.375741720199585, "eval_runtime": 45.5018, "eval_samples_per_second": 142.456, "eval_steps_per_second": 8.923, "step": 76000 }, { "epoch": 6.36, "learning_rate": 1.821505733754363e-05, "loss": 0.3824, "step": 76500 }, { "epoch": 6.36, "eval_f1": 0.42850714365323117, "eval_loss": 2.4190187454223633, "eval_runtime": 46.0335, "eval_samples_per_second": 140.81, "eval_steps_per_second": 8.82, "step": 76500 }, { "epoch": 6.4, "learning_rate": 1.80073126142596e-05, "loss": 0.4198, "step": 77000 }, { "epoch": 6.4, "eval_f1": 0.41957506619020246, "eval_loss": 2.291776657104492, "eval_runtime": 45.9135, "eval_samples_per_second": 141.179, "eval_steps_per_second": 8.843, "step": 77000 }, { "epoch": 6.44, "learning_rate": 1.779956789097557e-05, "loss": 0.3759, "step": 77500 }, { "epoch": 6.44, "eval_f1": 0.4248491736870143, "eval_loss": 2.2740871906280518, "eval_runtime": 46.8387, "eval_samples_per_second": 138.39, "eval_steps_per_second": 8.668, "step": 77500 }, { "epoch": 6.48, "learning_rate": 1.759182316769154e-05, "loss": 0.4006, "step": 78000 }, { "epoch": 6.48, "eval_f1": 0.43213490469184823, "eval_loss": 2.3717033863067627, "eval_runtime": 46.2157, "eval_samples_per_second": 140.255, "eval_steps_per_second": 8.785, "step": 78000 }, { "epoch": 6.52, "learning_rate": 1.7384078444407513e-05, "loss": 0.374, "step": 78500 }, { "epoch": 6.52, "eval_f1": 0.42844878973090866, "eval_loss": 2.2131588459014893, "eval_runtime": 46.4565, "eval_samples_per_second": 139.528, "eval_steps_per_second": 8.739, "step": 78500 }, { "epoch": 6.56, "learning_rate": 1.7176333721123482e-05, "loss": 0.3985, "step": 79000 }, { "epoch": 6.56, "eval_f1": 0.430486194952956, "eval_loss": 2.2677221298217773, "eval_runtime": 45.8499, "eval_samples_per_second": 141.374, "eval_steps_per_second": 8.855, "step": 79000 }, { "epoch": 6.61, "learning_rate": 1.6968588997839455e-05, "loss": 0.3892, "step": 79500 }, { "epoch": 6.61, "eval_f1": 0.4339078659073283, "eval_loss": 2.2518913745880127, "eval_runtime": 46.9488, "eval_samples_per_second": 138.065, "eval_steps_per_second": 8.648, "step": 79500 }, { "epoch": 6.65, "learning_rate": 1.6760844274555427e-05, "loss": 0.4071, "step": 80000 }, { "epoch": 6.65, "eval_f1": 0.4281750460163451, "eval_loss": 2.1826319694519043, "eval_runtime": 46.8418, "eval_samples_per_second": 138.381, "eval_steps_per_second": 8.667, "step": 80000 }, { "epoch": 6.69, "learning_rate": 1.6553099551271397e-05, "loss": 0.4054, "step": 80500 }, { "epoch": 6.69, "eval_f1": 0.425255645027199, "eval_loss": 2.1723647117614746, "eval_runtime": 46.2813, "eval_samples_per_second": 140.057, "eval_steps_per_second": 8.772, "step": 80500 }, { "epoch": 6.73, "learning_rate": 1.634535482798737e-05, "loss": 0.374, "step": 81000 }, { "epoch": 6.73, "eval_f1": 0.4243673281546822, "eval_loss": 2.208036184310913, "eval_runtime": 45.8911, "eval_samples_per_second": 141.248, "eval_steps_per_second": 8.847, "step": 81000 }, { "epoch": 6.77, "learning_rate": 1.6137610104703342e-05, "loss": 0.4086, "step": 81500 }, { "epoch": 6.77, "eval_f1": 0.4345723086799732, "eval_loss": 2.182802438735962, "eval_runtime": 45.7981, "eval_samples_per_second": 141.534, "eval_steps_per_second": 8.865, "step": 81500 }, { "epoch": 6.81, "learning_rate": 1.592986538141931e-05, "loss": 0.3828, "step": 82000 }, { "epoch": 6.81, "eval_f1": 0.43379920447502057, "eval_loss": 2.186859369277954, "eval_runtime": 45.8903, "eval_samples_per_second": 141.25, "eval_steps_per_second": 8.847, "step": 82000 }, { "epoch": 6.86, "learning_rate": 1.5722120658135284e-05, "loss": 0.4065, "step": 82500 }, { "epoch": 6.86, "eval_f1": 0.43151860579932383, "eval_loss": 2.138597249984741, "eval_runtime": 46.0037, "eval_samples_per_second": 140.902, "eval_steps_per_second": 8.825, "step": 82500 }, { "epoch": 6.9, "learning_rate": 1.5514375934851256e-05, "loss": 0.4058, "step": 83000 }, { "epoch": 6.9, "eval_f1": 0.43162515735240276, "eval_loss": 2.0976452827453613, "eval_runtime": 46.3993, "eval_samples_per_second": 139.7, "eval_steps_per_second": 8.75, "step": 83000 }, { "epoch": 6.94, "learning_rate": 1.5306631211567226e-05, "loss": 0.3873, "step": 83500 }, { "epoch": 6.94, "eval_f1": 0.4212864273818023, "eval_loss": 2.4226503372192383, "eval_runtime": 46.1616, "eval_samples_per_second": 140.42, "eval_steps_per_second": 8.795, "step": 83500 }, { "epoch": 6.98, "learning_rate": 1.5098886488283198e-05, "loss": 0.389, "step": 84000 }, { "epoch": 6.98, "eval_f1": 0.431436402933277, "eval_loss": 2.1598784923553467, "eval_runtime": 46.8062, "eval_samples_per_second": 138.486, "eval_steps_per_second": 8.674, "step": 84000 }, { "epoch": 7.02, "learning_rate": 1.489114176499917e-05, "loss": 0.3538, "step": 84500 }, { "epoch": 7.02, "eval_f1": 0.4350556588831767, "eval_loss": 2.304293155670166, "eval_runtime": 46.574, "eval_samples_per_second": 139.176, "eval_steps_per_second": 8.717, "step": 84500 }, { "epoch": 7.06, "learning_rate": 1.4683397041715142e-05, "loss": 0.3132, "step": 85000 }, { "epoch": 7.06, "eval_f1": 0.4266591713234993, "eval_loss": 2.650728702545166, "eval_runtime": 46.4231, "eval_samples_per_second": 139.629, "eval_steps_per_second": 8.746, "step": 85000 }, { "epoch": 7.1, "learning_rate": 1.4475652318431113e-05, "loss": 0.3166, "step": 85500 }, { "epoch": 7.1, "eval_f1": 0.43607320504273067, "eval_loss": 2.6059470176696777, "eval_runtime": 45.9726, "eval_samples_per_second": 140.997, "eval_steps_per_second": 8.831, "step": 85500 }, { "epoch": 7.15, "learning_rate": 1.4267907595147084e-05, "loss": 0.3303, "step": 86000 }, { "epoch": 7.15, "eval_f1": 0.43326161921977124, "eval_loss": 2.56927227973938, "eval_runtime": 46.099, "eval_samples_per_second": 140.611, "eval_steps_per_second": 8.807, "step": 86000 }, { "epoch": 7.19, "learning_rate": 1.4060162871863056e-05, "loss": 0.3031, "step": 86500 }, { "epoch": 7.19, "eval_f1": 0.43263480546826966, "eval_loss": 2.6123645305633545, "eval_runtime": 46.1092, "eval_samples_per_second": 140.579, "eval_steps_per_second": 8.805, "step": 86500 }, { "epoch": 7.23, "learning_rate": 1.3852418148579027e-05, "loss": 0.3495, "step": 87000 }, { "epoch": 7.23, "eval_f1": 0.42935020523616974, "eval_loss": 2.5714728832244873, "eval_runtime": 46.4434, "eval_samples_per_second": 139.568, "eval_steps_per_second": 8.742, "step": 87000 }, { "epoch": 7.27, "learning_rate": 1.3644673425294998e-05, "loss": 0.3364, "step": 87500 }, { "epoch": 7.27, "eval_f1": 0.4343767918185199, "eval_loss": 2.5482189655303955, "eval_runtime": 45.6061, "eval_samples_per_second": 142.13, "eval_steps_per_second": 8.902, "step": 87500 }, { "epoch": 7.31, "learning_rate": 1.3436928702010971e-05, "loss": 0.3169, "step": 88000 }, { "epoch": 7.31, "eval_f1": 0.4326820837375438, "eval_loss": 2.533505916595459, "eval_runtime": 45.7021, "eval_samples_per_second": 141.832, "eval_steps_per_second": 8.884, "step": 88000 }, { "epoch": 7.35, "learning_rate": 1.3229183978726942e-05, "loss": 0.3306, "step": 88500 }, { "epoch": 7.35, "eval_f1": 0.4336259208362092, "eval_loss": 2.505479097366333, "eval_runtime": 45.9282, "eval_samples_per_second": 141.133, "eval_steps_per_second": 8.84, "step": 88500 }, { "epoch": 7.4, "learning_rate": 1.3021439255442913e-05, "loss": 0.3238, "step": 89000 }, { "epoch": 7.4, "eval_f1": 0.4290241175053658, "eval_loss": 2.64029598236084, "eval_runtime": 45.7506, "eval_samples_per_second": 141.681, "eval_steps_per_second": 8.874, "step": 89000 }, { "epoch": 7.44, "learning_rate": 1.2813694532158885e-05, "loss": 0.3488, "step": 89500 }, { "epoch": 7.44, "eval_f1": 0.4318857789484726, "eval_loss": 2.4507477283477783, "eval_runtime": 46.4161, "eval_samples_per_second": 139.65, "eval_steps_per_second": 8.747, "step": 89500 }, { "epoch": 7.48, "learning_rate": 1.2605949808874856e-05, "loss": 0.3423, "step": 90000 }, { "epoch": 7.48, "eval_f1": 0.42784593140742677, "eval_loss": 2.4914281368255615, "eval_runtime": 46.3911, "eval_samples_per_second": 139.725, "eval_steps_per_second": 8.752, "step": 90000 }, { "epoch": 7.52, "learning_rate": 1.2398205085590826e-05, "loss": 0.3356, "step": 90500 }, { "epoch": 7.52, "eval_f1": 0.44002802787763307, "eval_loss": 2.723588466644287, "eval_runtime": 46.564, "eval_samples_per_second": 139.206, "eval_steps_per_second": 8.719, "step": 90500 }, { "epoch": 7.56, "learning_rate": 1.2190460362306798e-05, "loss": 0.3655, "step": 91000 }, { "epoch": 7.56, "eval_f1": 0.4342107835672762, "eval_loss": 2.516355276107788, "eval_runtime": 46.0601, "eval_samples_per_second": 140.729, "eval_steps_per_second": 8.815, "step": 91000 }, { "epoch": 7.6, "learning_rate": 1.198271563902277e-05, "loss": 0.3445, "step": 91500 }, { "epoch": 7.6, "eval_f1": 0.4392682142110146, "eval_loss": 2.4509174823760986, "eval_runtime": 46.5389, "eval_samples_per_second": 139.281, "eval_steps_per_second": 8.724, "step": 91500 }, { "epoch": 7.65, "learning_rate": 1.177497091573874e-05, "loss": 0.3123, "step": 92000 }, { "epoch": 7.65, "eval_f1": 0.4346012419342983, "eval_loss": 2.605870008468628, "eval_runtime": 46.0454, "eval_samples_per_second": 140.774, "eval_steps_per_second": 8.817, "step": 92000 }, { "epoch": 7.69, "learning_rate": 1.1567226192454713e-05, "loss": 0.3286, "step": 92500 }, { "epoch": 7.69, "eval_f1": 0.42900413120162884, "eval_loss": 2.5746617317199707, "eval_runtime": 46.6316, "eval_samples_per_second": 139.005, "eval_steps_per_second": 8.707, "step": 92500 }, { "epoch": 7.73, "learning_rate": 1.1359481469170684e-05, "loss": 0.3308, "step": 93000 }, { "epoch": 7.73, "eval_f1": 0.4348325522174462, "eval_loss": 2.6604156494140625, "eval_runtime": 46.0552, "eval_samples_per_second": 140.744, "eval_steps_per_second": 8.816, "step": 93000 }, { "epoch": 7.77, "learning_rate": 1.1151736745886655e-05, "loss": 0.3507, "step": 93500 }, { "epoch": 7.77, "eval_f1": 0.4362724679273792, "eval_loss": 2.6467623710632324, "eval_runtime": 46.2441, "eval_samples_per_second": 140.169, "eval_steps_per_second": 8.78, "step": 93500 }, { "epoch": 7.81, "learning_rate": 1.0943992022602627e-05, "loss": 0.3392, "step": 94000 }, { "epoch": 7.81, "eval_f1": 0.43161760121865855, "eval_loss": 2.6293702125549316, "eval_runtime": 45.6229, "eval_samples_per_second": 142.078, "eval_steps_per_second": 8.899, "step": 94000 }, { "epoch": 7.85, "learning_rate": 1.0736247299318598e-05, "loss": 0.3885, "step": 94500 }, { "epoch": 7.85, "eval_f1": 0.42849041218645784, "eval_loss": 2.4070699214935303, "eval_runtime": 45.9402, "eval_samples_per_second": 141.096, "eval_steps_per_second": 8.838, "step": 94500 }, { "epoch": 7.89, "learning_rate": 1.052850257603457e-05, "loss": 0.3458, "step": 95000 }, { "epoch": 7.89, "eval_f1": 0.42803465998306145, "eval_loss": 2.5539064407348633, "eval_runtime": 46.1313, "eval_samples_per_second": 140.512, "eval_steps_per_second": 8.801, "step": 95000 }, { "epoch": 7.94, "learning_rate": 1.032075785275054e-05, "loss": 0.3451, "step": 95500 }, { "epoch": 7.94, "eval_f1": 0.4282701100981614, "eval_loss": 2.4033892154693604, "eval_runtime": 46.4312, "eval_samples_per_second": 139.604, "eval_steps_per_second": 8.744, "step": 95500 }, { "epoch": 7.98, "learning_rate": 1.0113013129466511e-05, "loss": 0.3394, "step": 96000 }, { "epoch": 7.98, "eval_f1": 0.43486076258951, "eval_loss": 2.6116716861724854, "eval_runtime": 45.9663, "eval_samples_per_second": 141.016, "eval_steps_per_second": 8.833, "step": 96000 }, { "epoch": 8.02, "learning_rate": 9.905268406182484e-06, "loss": 0.3275, "step": 96500 }, { "epoch": 8.02, "eval_f1": 0.4322422606334531, "eval_loss": 2.5991387367248535, "eval_runtime": 46.5777, "eval_samples_per_second": 139.165, "eval_steps_per_second": 8.717, "step": 96500 }, { "epoch": 8.06, "learning_rate": 9.697523682898455e-06, "loss": 0.2676, "step": 97000 }, { "epoch": 8.06, "eval_f1": 0.42999721877624847, "eval_loss": 2.772134304046631, "eval_runtime": 46.0246, "eval_samples_per_second": 140.838, "eval_steps_per_second": 8.821, "step": 97000 }, { "epoch": 8.1, "learning_rate": 9.489778959614426e-06, "loss": 0.3013, "step": 97500 }, { "epoch": 8.1, "eval_f1": 0.43159307099859695, "eval_loss": 2.7766764163970947, "eval_runtime": 46.3012, "eval_samples_per_second": 139.996, "eval_steps_per_second": 8.769, "step": 97500 }, { "epoch": 8.14, "learning_rate": 9.282034236330398e-06, "loss": 0.283, "step": 98000 }, { "epoch": 8.14, "eval_f1": 0.43429947059083457, "eval_loss": 2.723806142807007, "eval_runtime": 46.2805, "eval_samples_per_second": 140.059, "eval_steps_per_second": 8.773, "step": 98000 }, { "epoch": 8.19, "learning_rate": 9.07428951304637e-06, "loss": 0.2924, "step": 98500 }, { "epoch": 8.19, "eval_f1": 0.43274102934044933, "eval_loss": 2.7132375240325928, "eval_runtime": 46.1859, "eval_samples_per_second": 140.346, "eval_steps_per_second": 8.791, "step": 98500 }, { "epoch": 8.23, "learning_rate": 8.86654478976234e-06, "loss": 0.2817, "step": 99000 }, { "epoch": 8.23, "eval_f1": 0.427616805108728, "eval_loss": 2.7119312286376953, "eval_runtime": 56.0523, "eval_samples_per_second": 115.642, "eval_steps_per_second": 7.243, "step": 99000 }, { "epoch": 8.27, "learning_rate": 8.658800066478313e-06, "loss": 0.2586, "step": 99500 }, { "epoch": 8.27, "eval_f1": 0.4289585910640077, "eval_loss": 2.887305736541748, "eval_runtime": 46.8426, "eval_samples_per_second": 138.378, "eval_steps_per_second": 8.667, "step": 99500 }, { "epoch": 8.31, "learning_rate": 8.451055343194284e-06, "loss": 0.3051, "step": 100000 }, { "epoch": 8.31, "eval_f1": 0.42582497432500993, "eval_loss": 2.7572007179260254, "eval_runtime": 45.6991, "eval_samples_per_second": 141.841, "eval_steps_per_second": 8.884, "step": 100000 }, { "epoch": 8.35, "learning_rate": 8.243310619910255e-06, "loss": 0.2916, "step": 100500 }, { "epoch": 8.35, "eval_f1": 0.43075118593793377, "eval_loss": 2.814906358718872, "eval_runtime": 46.2133, "eval_samples_per_second": 140.263, "eval_steps_per_second": 8.785, "step": 100500 }, { "epoch": 8.39, "learning_rate": 8.035565896626227e-06, "loss": 0.2948, "step": 101000 }, { "epoch": 8.39, "eval_f1": 0.4244624620028211, "eval_loss": 2.6767539978027344, "eval_runtime": 46.5451, "eval_samples_per_second": 139.263, "eval_steps_per_second": 8.723, "step": 101000 }, { "epoch": 8.43, "learning_rate": 7.827821173342198e-06, "loss": 0.277, "step": 101500 }, { "epoch": 8.43, "eval_f1": 0.4273818288568198, "eval_loss": 2.727567672729492, "eval_runtime": 46.8521, "eval_samples_per_second": 138.35, "eval_steps_per_second": 8.666, "step": 101500 }, { "epoch": 8.48, "learning_rate": 7.620076450058169e-06, "loss": 0.2929, "step": 102000 }, { "epoch": 8.48, "eval_f1": 0.4320719535965778, "eval_loss": 2.926513433456421, "eval_runtime": 47.1016, "eval_samples_per_second": 137.617, "eval_steps_per_second": 8.62, "step": 102000 }, { "epoch": 8.52, "learning_rate": 7.412331726774139e-06, "loss": 0.2893, "step": 102500 }, { "epoch": 8.52, "eval_f1": 0.4260519729309167, "eval_loss": 2.877741575241089, "eval_runtime": 45.6581, "eval_samples_per_second": 141.968, "eval_steps_per_second": 8.892, "step": 102500 }, { "epoch": 8.56, "learning_rate": 7.204587003490111e-06, "loss": 0.2808, "step": 103000 }, { "epoch": 8.56, "eval_f1": 0.4282218551070931, "eval_loss": 2.7379603385925293, "eval_runtime": 45.9855, "eval_samples_per_second": 140.957, "eval_steps_per_second": 8.829, "step": 103000 }, { "epoch": 8.6, "learning_rate": 6.996842280206083e-06, "loss": 0.2728, "step": 103500 }, { "epoch": 8.6, "eval_f1": 0.43170842579550445, "eval_loss": 2.7965376377105713, "eval_runtime": 45.7627, "eval_samples_per_second": 141.644, "eval_steps_per_second": 8.872, "step": 103500 }, { "epoch": 8.64, "learning_rate": 6.789097556922054e-06, "loss": 0.2789, "step": 104000 }, { "epoch": 8.64, "eval_f1": 0.4315708387912964, "eval_loss": 2.875927209854126, "eval_runtime": 45.9208, "eval_samples_per_second": 141.156, "eval_steps_per_second": 8.841, "step": 104000 }, { "epoch": 8.68, "learning_rate": 6.5813528336380256e-06, "loss": 0.3072, "step": 104500 }, { "epoch": 8.68, "eval_f1": 0.42772350225777217, "eval_loss": 2.8333616256713867, "eval_runtime": 49.0657, "eval_samples_per_second": 132.109, "eval_steps_per_second": 8.275, "step": 104500 }, { "epoch": 8.73, "learning_rate": 6.373608110353997e-06, "loss": 0.2779, "step": 105000 }, { "epoch": 8.73, "eval_f1": 0.4306437356669429, "eval_loss": 2.8720405101776123, "eval_runtime": 46.0481, "eval_samples_per_second": 140.766, "eval_steps_per_second": 8.817, "step": 105000 }, { "epoch": 8.77, "learning_rate": 6.165863387069968e-06, "loss": 0.2948, "step": 105500 }, { "epoch": 8.77, "eval_f1": 0.42931126691296156, "eval_loss": 2.82370662689209, "eval_runtime": 45.6262, "eval_samples_per_second": 142.067, "eval_steps_per_second": 8.898, "step": 105500 }, { "epoch": 8.81, "learning_rate": 5.95811866378594e-06, "loss": 0.2917, "step": 106000 }, { "epoch": 8.81, "eval_f1": 0.42987557908855206, "eval_loss": 2.785443067550659, "eval_runtime": 45.7985, "eval_samples_per_second": 141.533, "eval_steps_per_second": 8.865, "step": 106000 }, { "epoch": 8.85, "learning_rate": 5.750373940501912e-06, "loss": 0.2604, "step": 106500 }, { "epoch": 8.85, "eval_f1": 0.4272210968347812, "eval_loss": 2.9237265586853027, "eval_runtime": 45.9348, "eval_samples_per_second": 141.113, "eval_steps_per_second": 8.839, "step": 106500 }, { "epoch": 8.89, "learning_rate": 5.542629217217883e-06, "loss": 0.3057, "step": 107000 }, { "epoch": 8.89, "eval_f1": 0.4287531329908217, "eval_loss": 2.8509225845336914, "eval_runtime": 46.4174, "eval_samples_per_second": 139.646, "eval_steps_per_second": 8.747, "step": 107000 }, { "epoch": 8.93, "learning_rate": 5.334884493933855e-06, "loss": 0.2853, "step": 107500 }, { "epoch": 8.93, "eval_f1": 0.4273342046666312, "eval_loss": 2.7482059001922607, "eval_runtime": 45.8656, "eval_samples_per_second": 141.326, "eval_steps_per_second": 8.852, "step": 107500 }, { "epoch": 8.97, "learning_rate": 5.1271397706498255e-06, "loss": 0.2946, "step": 108000 }, { "epoch": 8.97, "eval_f1": 0.4272429839187248, "eval_loss": 2.86079740524292, "eval_runtime": 46.3563, "eval_samples_per_second": 139.83, "eval_steps_per_second": 8.758, "step": 108000 }, { "epoch": 9.02, "learning_rate": 4.919395047365797e-06, "loss": 0.2492, "step": 108500 }, { "epoch": 9.02, "eval_f1": 0.4263680579842533, "eval_loss": 3.0018138885498047, "eval_runtime": 46.2406, "eval_samples_per_second": 140.18, "eval_steps_per_second": 8.78, "step": 108500 }, { "epoch": 9.06, "learning_rate": 4.711650324081768e-06, "loss": 0.2471, "step": 109000 }, { "epoch": 9.06, "eval_f1": 0.42881222429550997, "eval_loss": 3.014409303665161, "eval_runtime": 46.1142, "eval_samples_per_second": 140.564, "eval_steps_per_second": 8.804, "step": 109000 }, { "epoch": 9.1, "learning_rate": 4.50390560079774e-06, "loss": 0.2497, "step": 109500 }, { "epoch": 9.1, "eval_f1": 0.4299801659722133, "eval_loss": 3.0282411575317383, "eval_runtime": 46.4599, "eval_samples_per_second": 139.518, "eval_steps_per_second": 8.739, "step": 109500 }, { "epoch": 9.14, "learning_rate": 4.296160877513712e-06, "loss": 0.2632, "step": 110000 }, { "epoch": 9.14, "eval_f1": 0.4285290598627509, "eval_loss": 2.9987945556640625, "eval_runtime": 46.2719, "eval_samples_per_second": 140.085, "eval_steps_per_second": 8.774, "step": 110000 }, { "epoch": 9.18, "learning_rate": 4.088416154229683e-06, "loss": 0.2292, "step": 110500 }, { "epoch": 9.18, "eval_f1": 0.4295693743332262, "eval_loss": 2.9812545776367188, "eval_runtime": 46.0798, "eval_samples_per_second": 140.669, "eval_steps_per_second": 8.811, "step": 110500 }, { "epoch": 9.22, "learning_rate": 3.8806714309456546e-06, "loss": 0.2467, "step": 111000 }, { "epoch": 9.22, "eval_f1": 0.4292138712207353, "eval_loss": 2.9471848011016846, "eval_runtime": 46.1858, "eval_samples_per_second": 140.346, "eval_steps_per_second": 8.791, "step": 111000 }, { "epoch": 9.27, "learning_rate": 3.672926707661625e-06, "loss": 0.2405, "step": 111500 }, { "epoch": 9.27, "eval_f1": 0.4283879568475488, "eval_loss": 3.013239860534668, "eval_runtime": 46.341, "eval_samples_per_second": 139.876, "eval_steps_per_second": 8.761, "step": 111500 }, { "epoch": 9.31, "learning_rate": 3.465181984377597e-06, "loss": 0.2479, "step": 112000 }, { "epoch": 9.31, "eval_f1": 0.43018803192623334, "eval_loss": 2.974679946899414, "eval_runtime": 46.0989, "eval_samples_per_second": 140.611, "eval_steps_per_second": 8.807, "step": 112000 }, { "epoch": 9.35, "learning_rate": 3.2574372610935682e-06, "loss": 0.2415, "step": 112500 }, { "epoch": 9.35, "eval_f1": 0.43026318885711246, "eval_loss": 2.9601778984069824, "eval_runtime": 46.5726, "eval_samples_per_second": 139.18, "eval_steps_per_second": 8.718, "step": 112500 }, { "epoch": 9.39, "learning_rate": 3.0496925378095396e-06, "loss": 0.2387, "step": 113000 }, { "epoch": 9.39, "eval_f1": 0.4279864446994403, "eval_loss": 2.995856285095215, "eval_runtime": 45.9971, "eval_samples_per_second": 140.922, "eval_steps_per_second": 8.827, "step": 113000 }, { "epoch": 9.43, "learning_rate": 2.8419478145255114e-06, "loss": 0.2005, "step": 113500 }, { "epoch": 9.43, "eval_f1": 0.4226136111773091, "eval_loss": 3.061025857925415, "eval_runtime": 46.3377, "eval_samples_per_second": 139.886, "eval_steps_per_second": 8.762, "step": 113500 }, { "epoch": 9.47, "learning_rate": 2.6342030912414823e-06, "loss": 0.2479, "step": 114000 }, { "epoch": 9.47, "eval_f1": 0.4310947389401135, "eval_loss": 3.0478320121765137, "eval_runtime": 46.1598, "eval_samples_per_second": 140.425, "eval_steps_per_second": 8.796, "step": 114000 }, { "epoch": 9.51, "learning_rate": 2.426458367957454e-06, "loss": 0.2649, "step": 114500 }, { "epoch": 9.51, "eval_f1": 0.42937936025434953, "eval_loss": 2.964843511581421, "eval_runtime": 45.9242, "eval_samples_per_second": 141.146, "eval_steps_per_second": 8.841, "step": 114500 }, { "epoch": 9.56, "learning_rate": 2.2187136446734255e-06, "loss": 0.2659, "step": 115000 }, { "epoch": 9.56, "eval_f1": 0.42954375020993524, "eval_loss": 2.9387714862823486, "eval_runtime": 46.5353, "eval_samples_per_second": 139.292, "eval_steps_per_second": 8.725, "step": 115000 }, { "epoch": 9.6, "learning_rate": 2.010968921389397e-06, "loss": 0.2723, "step": 115500 }, { "epoch": 9.6, "eval_f1": 0.43453017211998707, "eval_loss": 2.942929744720459, "eval_runtime": 45.4198, "eval_samples_per_second": 142.713, "eval_steps_per_second": 8.939, "step": 115500 }, { "epoch": 9.64, "learning_rate": 1.8032241981053682e-06, "loss": 0.2607, "step": 116000 }, { "epoch": 9.64, "eval_f1": 0.43214152175935766, "eval_loss": 2.9575393199920654, "eval_runtime": 46.6531, "eval_samples_per_second": 138.94, "eval_steps_per_second": 8.703, "step": 116000 }, { "epoch": 9.68, "learning_rate": 1.5954794748213396e-06, "loss": 0.2284, "step": 116500 }, { "epoch": 9.68, "eval_f1": 0.43057572034563696, "eval_loss": 2.9958791732788086, "eval_runtime": 45.9052, "eval_samples_per_second": 141.204, "eval_steps_per_second": 8.844, "step": 116500 }, { "epoch": 9.72, "learning_rate": 1.387734751537311e-06, "loss": 0.2703, "step": 117000 }, { "epoch": 9.72, "eval_f1": 0.4305604104069932, "eval_loss": 2.9565792083740234, "eval_runtime": 46.6048, "eval_samples_per_second": 139.084, "eval_steps_per_second": 8.712, "step": 117000 }, { "epoch": 9.76, "learning_rate": 1.1799900282532825e-06, "loss": 0.2358, "step": 117500 }, { "epoch": 9.76, "eval_f1": 0.4299059329453134, "eval_loss": 2.9763364791870117, "eval_runtime": 45.9929, "eval_samples_per_second": 140.935, "eval_steps_per_second": 8.827, "step": 117500 }, { "epoch": 9.81, "learning_rate": 9.722453049692539e-07, "loss": 0.2633, "step": 118000 }, { "epoch": 9.81, "eval_f1": 0.42872674431828584, "eval_loss": 2.9550485610961914, "eval_runtime": 46.191, "eval_samples_per_second": 140.33, "eval_steps_per_second": 8.79, "step": 118000 }, { "epoch": 9.85, "learning_rate": 7.645005816852252e-07, "loss": 0.2184, "step": 118500 }, { "epoch": 9.85, "eval_f1": 0.43016074486231115, "eval_loss": 2.9980921745300293, "eval_runtime": 46.1749, "eval_samples_per_second": 140.379, "eval_steps_per_second": 8.793, "step": 118500 }, { "epoch": 9.89, "learning_rate": 5.567558584011966e-07, "loss": 0.2299, "step": 119000 }, { "epoch": 9.89, "eval_f1": 0.4306080177005011, "eval_loss": 3.0055530071258545, "eval_runtime": 46.0833, "eval_samples_per_second": 140.658, "eval_steps_per_second": 8.81, "step": 119000 }, { "epoch": 9.93, "learning_rate": 3.4901113511716807e-07, "loss": 0.2469, "step": 119500 }, { "epoch": 9.93, "eval_f1": 0.4310614992045377, "eval_loss": 2.9983813762664795, "eval_runtime": 46.3038, "eval_samples_per_second": 139.989, "eval_steps_per_second": 8.768, "step": 119500 }, { "epoch": 9.97, "learning_rate": 1.4126641183313946e-07, "loss": 0.2741, "step": 120000 }, { "epoch": 9.97, "eval_f1": 0.43169668013947665, "eval_loss": 2.9964375495910645, "eval_runtime": 46.5054, "eval_samples_per_second": 139.382, "eval_steps_per_second": 8.73, "step": 120000 }, { "epoch": 10.0, "step": 120340, "total_flos": 5.252278015131096e+16, "train_loss": 0.5760954068351747, "train_runtime": 42398.6115, "train_samples_per_second": 22.706, "train_steps_per_second": 2.838 } ], "logging_steps": 500, "max_steps": 120340, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.252278015131096e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }