|
{ |
|
"best_metric": 0.9102957175353575, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-masakhaner-conll_2003_en/checkpoint-15000", |
|
"epoch": 34.16856492027335, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy_score": 0.905671048012793, |
|
"eval_f1": 0.4031678791785615, |
|
"eval_loss": 0.3176519572734833, |
|
"eval_precision": 0.37412408135361475, |
|
"eval_recall": 0.4371006389776358, |
|
"eval_runtime": 25.4824, |
|
"eval_samples_per_second": 127.539, |
|
"eval_steps_per_second": 15.972, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy_score": 0.9367760053044191, |
|
"eval_f1": 0.600400266844563, |
|
"eval_loss": 0.20133168995380402, |
|
"eval_precision": 0.5742935278030994, |
|
"eval_recall": 0.6289936102236422, |
|
"eval_runtime": 25.4664, |
|
"eval_samples_per_second": 127.619, |
|
"eval_steps_per_second": 15.982, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.3604, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy_score": 0.9617379773002067, |
|
"eval_f1": 0.766023166023166, |
|
"eval_loss": 0.1343105286359787, |
|
"eval_precision": 0.7414050822122571, |
|
"eval_recall": 0.792332268370607, |
|
"eval_runtime": 25.4735, |
|
"eval_samples_per_second": 127.584, |
|
"eval_steps_per_second": 15.977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy_score": 0.9683294980303444, |
|
"eval_f1": 0.7997698504027618, |
|
"eval_loss": 0.11298365145921707, |
|
"eval_precision": 0.7693726937269373, |
|
"eval_recall": 0.832667731629393, |
|
"eval_runtime": 25.4403, |
|
"eval_samples_per_second": 127.75, |
|
"eval_steps_per_second": 15.998, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.1118, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy_score": 0.9734389016732322, |
|
"eval_f1": 0.8381742738589212, |
|
"eval_loss": 0.09904822707176208, |
|
"eval_precision": 0.8110177404295051, |
|
"eval_recall": 0.8672124600638977, |
|
"eval_runtime": 25.4063, |
|
"eval_samples_per_second": 127.921, |
|
"eval_steps_per_second": 16.02, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy_score": 0.9739654432700183, |
|
"eval_f1": 0.8414692624916082, |
|
"eval_loss": 0.09652349352836609, |
|
"eval_precision": 0.8095589592175678, |
|
"eval_recall": 0.8759984025559105, |
|
"eval_runtime": 25.4049, |
|
"eval_samples_per_second": 127.928, |
|
"eval_steps_per_second": 16.021, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_accuracy_score": 0.9777682436912516, |
|
"eval_f1": 0.8652216748768473, |
|
"eval_loss": 0.09149660170078278, |
|
"eval_precision": 0.853947880202256, |
|
"eval_recall": 0.876797124600639, |
|
"eval_runtime": 25.4531, |
|
"eval_samples_per_second": 127.686, |
|
"eval_steps_per_second": 15.99, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0611, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy_score": 0.9783922929911463, |
|
"eval_f1": 0.8685501275259958, |
|
"eval_loss": 0.08810298144817352, |
|
"eval_precision": 0.8536444273042808, |
|
"eval_recall": 0.8839856230031949, |
|
"eval_runtime": 25.471, |
|
"eval_samples_per_second": 127.596, |
|
"eval_steps_per_second": 15.979, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy_score": 0.9792113576972581, |
|
"eval_f1": 0.8704247406537483, |
|
"eval_loss": 0.08606501668691635, |
|
"eval_precision": 0.8535508637236084, |
|
"eval_recall": 0.887979233226837, |
|
"eval_runtime": 25.4698, |
|
"eval_samples_per_second": 127.602, |
|
"eval_steps_per_second": 15.98, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0382, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy_score": 0.9797574008346659, |
|
"eval_f1": 0.875222816399287, |
|
"eval_loss": 0.08422768861055374, |
|
"eval_precision": 0.8681728880157171, |
|
"eval_recall": 0.882388178913738, |
|
"eval_runtime": 25.4807, |
|
"eval_samples_per_second": 127.547, |
|
"eval_steps_per_second": 15.973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy_score": 0.9801084285658567, |
|
"eval_f1": 0.8783047826517477, |
|
"eval_loss": 0.08461492508649826, |
|
"eval_precision": 0.8711451581221764, |
|
"eval_recall": 0.8855830670926518, |
|
"eval_runtime": 25.4915, |
|
"eval_samples_per_second": 127.494, |
|
"eval_steps_per_second": 15.966, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_accuracy_score": 0.9802839424314521, |
|
"eval_f1": 0.8772208436724566, |
|
"eval_loss": 0.0915483608841896, |
|
"eval_precision": 0.872113676731794, |
|
"eval_recall": 0.882388178913738, |
|
"eval_runtime": 25.5288, |
|
"eval_samples_per_second": 127.307, |
|
"eval_steps_per_second": 15.943, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0269, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_accuracy_score": 0.9790358438316628, |
|
"eval_f1": 0.8738092899931258, |
|
"eval_loss": 0.08523886650800705, |
|
"eval_precision": 0.8597101449275363, |
|
"eval_recall": 0.8883785942492013, |
|
"eval_runtime": 25.4488, |
|
"eval_samples_per_second": 127.707, |
|
"eval_steps_per_second": 15.993, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_accuracy_score": 0.980556964000156, |
|
"eval_f1": 0.8805166846071044, |
|
"eval_loss": 0.0843101516366005, |
|
"eval_precision": 0.8633659566302053, |
|
"eval_recall": 0.8983626198083067, |
|
"eval_runtime": 25.4351, |
|
"eval_samples_per_second": 127.776, |
|
"eval_steps_per_second": 16.002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0195, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_accuracy_score": 0.9802449393502086, |
|
"eval_f1": 0.8802047445614726, |
|
"eval_loss": 0.09137295186519623, |
|
"eval_precision": 0.8679867986798679, |
|
"eval_recall": 0.8927715654952076, |
|
"eval_runtime": 25.4199, |
|
"eval_samples_per_second": 127.853, |
|
"eval_steps_per_second": 16.011, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy_score": 0.9824096103592184, |
|
"eval_f1": 0.8886048569462195, |
|
"eval_loss": 0.08367849141359329, |
|
"eval_precision": 0.8752663180321518, |
|
"eval_recall": 0.9023562300319489, |
|
"eval_runtime": 25.6211, |
|
"eval_samples_per_second": 126.849, |
|
"eval_steps_per_second": 15.885, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_accuracy_score": 0.9814540348687546, |
|
"eval_f1": 0.886024025783768, |
|
"eval_loss": 0.08719700574874878, |
|
"eval_precision": 0.8671382144905372, |
|
"eval_recall": 0.9057507987220448, |
|
"eval_runtime": 25.3936, |
|
"eval_samples_per_second": 127.985, |
|
"eval_steps_per_second": 16.028, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0136, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_accuracy_score": 0.9823316041967315, |
|
"eval_f1": 0.8920094936708859, |
|
"eval_loss": 0.0911451056599617, |
|
"eval_precision": 0.8836206896551724, |
|
"eval_recall": 0.9005591054313099, |
|
"eval_runtime": 25.4627, |
|
"eval_samples_per_second": 127.638, |
|
"eval_steps_per_second": 15.984, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_accuracy_score": 0.9830921642809782, |
|
"eval_f1": 0.8962131837307153, |
|
"eval_loss": 0.08473534882068634, |
|
"eval_precision": 0.8992762364294331, |
|
"eval_recall": 0.8931709265175719, |
|
"eval_runtime": 25.4742, |
|
"eval_samples_per_second": 127.58, |
|
"eval_steps_per_second": 15.977, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.011, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_accuracy_score": 0.9824096103592184, |
|
"eval_f1": 0.8943864878291108, |
|
"eval_loss": 0.08603190630674362, |
|
"eval_precision": 0.8900533913387384, |
|
"eval_recall": 0.8987619808306709, |
|
"eval_runtime": 25.451, |
|
"eval_samples_per_second": 127.696, |
|
"eval_steps_per_second": 15.991, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_accuracy_score": 0.9822535980342447, |
|
"eval_f1": 0.8913835548990644, |
|
"eval_loss": 0.09200570732355118, |
|
"eval_precision": 0.8793471925393433, |
|
"eval_recall": 0.9037539936102237, |
|
"eval_runtime": 25.5623, |
|
"eval_samples_per_second": 127.141, |
|
"eval_steps_per_second": 15.922, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"eval_accuracy_score": 0.9827801396310308, |
|
"eval_f1": 0.8969183814118342, |
|
"eval_loss": 0.08794757723808289, |
|
"eval_precision": 0.8846377937463585, |
|
"eval_recall": 0.9095447284345048, |
|
"eval_runtime": 25.5902, |
|
"eval_samples_per_second": 127.002, |
|
"eval_steps_per_second": 15.905, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0085, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"eval_accuracy_score": 0.9820000780061625, |
|
"eval_f1": 0.890210067380103, |
|
"eval_loss": 0.09557678550481796, |
|
"eval_precision": 0.8835562549173879, |
|
"eval_recall": 0.896964856230032, |
|
"eval_runtime": 25.5311, |
|
"eval_samples_per_second": 127.296, |
|
"eval_steps_per_second": 15.941, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_accuracy_score": 0.9825266196029486, |
|
"eval_f1": 0.8952569169960475, |
|
"eval_loss": 0.0915537029504776, |
|
"eval_precision": 0.886150234741784, |
|
"eval_recall": 0.9045527156549521, |
|
"eval_runtime": 25.5326, |
|
"eval_samples_per_second": 127.288, |
|
"eval_steps_per_second": 15.94, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0077, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"eval_accuracy_score": 0.9818635672218106, |
|
"eval_f1": 0.8888670917116234, |
|
"eval_loss": 0.09687037765979767, |
|
"eval_precision": 0.8735299787931367, |
|
"eval_recall": 0.9047523961661342, |
|
"eval_runtime": 25.5023, |
|
"eval_samples_per_second": 127.44, |
|
"eval_steps_per_second": 15.959, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"eval_accuracy_score": 0.9833846873903038, |
|
"eval_f1": 0.9005432098765432, |
|
"eval_loss": 0.08850205689668655, |
|
"eval_precision": 0.8909517295290209, |
|
"eval_recall": 0.9103434504792333, |
|
"eval_runtime": 25.8091, |
|
"eval_samples_per_second": 125.925, |
|
"eval_steps_per_second": 15.77, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"eval_accuracy_score": 0.9832676781465736, |
|
"eval_f1": 0.8994011976047904, |
|
"eval_loss": 0.09829097986221313, |
|
"eval_precision": 0.8990422984836393, |
|
"eval_recall": 0.8997603833865815, |
|
"eval_runtime": 25.5033, |
|
"eval_samples_per_second": 127.434, |
|
"eval_steps_per_second": 15.959, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0064, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"eval_accuracy_score": 0.9810054994344554, |
|
"eval_f1": 0.8866424575522622, |
|
"eval_loss": 0.1039794534444809, |
|
"eval_precision": 0.8718394132406871, |
|
"eval_recall": 0.9019568690095847, |
|
"eval_runtime": 25.823, |
|
"eval_samples_per_second": 125.857, |
|
"eval_steps_per_second": 15.761, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_accuracy_score": 0.9827606380904091, |
|
"eval_f1": 0.8954599761051374, |
|
"eval_loss": 0.09817508608102798, |
|
"eval_precision": 0.8929706115965051, |
|
"eval_recall": 0.8979632587859425, |
|
"eval_runtime": 25.5457, |
|
"eval_samples_per_second": 127.223, |
|
"eval_steps_per_second": 15.932, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.0053, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"eval_accuracy_score": 0.9824291118998401, |
|
"eval_f1": 0.8904041901373655, |
|
"eval_loss": 0.10226656496524811, |
|
"eval_precision": 0.8814322050479358, |
|
"eval_recall": 0.8995607028753994, |
|
"eval_runtime": 25.5455, |
|
"eval_samples_per_second": 127.224, |
|
"eval_steps_per_second": 15.932, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"eval_accuracy_score": 0.9825461211435703, |
|
"eval_f1": 0.8980195093112622, |
|
"eval_loss": 0.10106658190488815, |
|
"eval_precision": 0.8864034234584711, |
|
"eval_recall": 0.909944089456869, |
|
"eval_runtime": 25.4581, |
|
"eval_samples_per_second": 127.661, |
|
"eval_steps_per_second": 15.987, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_accuracy_score": 0.9829361519560045, |
|
"eval_f1": 0.8955665024630541, |
|
"eval_loss": 0.10531075298786163, |
|
"eval_precision": 0.8838973162193698, |
|
"eval_recall": 0.9075479233226837, |
|
"eval_runtime": 25.4921, |
|
"eval_samples_per_second": 127.491, |
|
"eval_steps_per_second": 15.966, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.0047, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"eval_accuracy_score": 0.9832091735247085, |
|
"eval_f1": 0.9004636480220973, |
|
"eval_loss": 0.0937289446592331, |
|
"eval_precision": 0.8898420744784559, |
|
"eval_recall": 0.9113418530351438, |
|
"eval_runtime": 25.4989, |
|
"eval_samples_per_second": 127.456, |
|
"eval_steps_per_second": 15.961, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"eval_accuracy_score": 0.9830141581184914, |
|
"eval_f1": 0.8983334976826743, |
|
"eval_loss": 0.10006673634052277, |
|
"eval_precision": 0.8873952854081434, |
|
"eval_recall": 0.9095447284345048, |
|
"eval_runtime": 25.5372, |
|
"eval_samples_per_second": 127.265, |
|
"eval_steps_per_second": 15.938, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.0037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_accuracy_score": 0.9821170872498928, |
|
"eval_f1": 0.8939078751857356, |
|
"eval_loss": 0.10096853971481323, |
|
"eval_precision": 0.886966778061726, |
|
"eval_recall": 0.9009584664536742, |
|
"eval_runtime": 25.5089, |
|
"eval_samples_per_second": 127.406, |
|
"eval_steps_per_second": 15.955, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"eval_accuracy_score": 0.9839112289870899, |
|
"eval_f1": 0.9025406907502977, |
|
"eval_loss": 0.09699645638465881, |
|
"eval_precision": 0.8971981057616417, |
|
"eval_recall": 0.9079472843450479, |
|
"eval_runtime": 25.3546, |
|
"eval_samples_per_second": 128.182, |
|
"eval_steps_per_second": 16.052, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"eval_accuracy_score": 0.9833846873903038, |
|
"eval_f1": 0.9011811023622048, |
|
"eval_loss": 0.10188570618629456, |
|
"eval_precision": 0.8885869565217391, |
|
"eval_recall": 0.9141373801916933, |
|
"eval_runtime": 25.3306, |
|
"eval_samples_per_second": 128.303, |
|
"eval_steps_per_second": 16.068, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 17.08, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.003, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"eval_accuracy_score": 0.9829556534966263, |
|
"eval_f1": 0.9035664474995019, |
|
"eval_loss": 0.11874072253704071, |
|
"eval_precision": 0.9015904572564613, |
|
"eval_recall": 0.9055511182108626, |
|
"eval_runtime": 25.391, |
|
"eval_samples_per_second": 127.998, |
|
"eval_steps_per_second": 16.029, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"eval_accuracy_score": 0.9826241273060572, |
|
"eval_f1": 0.8982106189498388, |
|
"eval_loss": 0.10930322110652924, |
|
"eval_precision": 0.880053650124545, |
|
"eval_recall": 0.917132587859425, |
|
"eval_runtime": 25.3359, |
|
"eval_samples_per_second": 128.277, |
|
"eval_steps_per_second": 16.064, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.0023, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"eval_accuracy_score": 0.9830141581184914, |
|
"eval_f1": 0.8999901273570936, |
|
"eval_loss": 0.10627970844507217, |
|
"eval_precision": 0.8900605350517478, |
|
"eval_recall": 0.9101437699680511, |
|
"eval_runtime": 25.3382, |
|
"eval_samples_per_second": 128.265, |
|
"eval_steps_per_second": 16.063, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"eval_accuracy_score": 0.983774718202738, |
|
"eval_f1": 0.9028480698620622, |
|
"eval_loss": 0.09893312305212021, |
|
"eval_precision": 0.8974156638390215, |
|
"eval_recall": 0.9083466453674122, |
|
"eval_runtime": 25.3736, |
|
"eval_samples_per_second": 128.086, |
|
"eval_steps_per_second": 16.04, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_accuracy_score": 0.9830921642809782, |
|
"eval_f1": 0.9000591366055589, |
|
"eval_loss": 0.11214980483055115, |
|
"eval_precision": 0.8886726352666408, |
|
"eval_recall": 0.911741214057508, |
|
"eval_runtime": 25.4824, |
|
"eval_samples_per_second": 127.539, |
|
"eval_steps_per_second": 15.972, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 2.181208053691275e-05, |
|
"loss": 0.0022, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"eval_accuracy_score": 0.9835406997152775, |
|
"eval_f1": 0.8996444093243776, |
|
"eval_loss": 0.10627951472997665, |
|
"eval_precision": 0.8901485535574668, |
|
"eval_recall": 0.9093450479233227, |
|
"eval_runtime": 25.3697, |
|
"eval_samples_per_second": 128.105, |
|
"eval_steps_per_second": 16.043, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"eval_accuracy_score": 0.9832676781465736, |
|
"eval_f1": 0.9012602957229334, |
|
"eval_loss": 0.1101519763469696, |
|
"eval_precision": 0.8958374432826988, |
|
"eval_recall": 0.9067492012779552, |
|
"eval_runtime": 25.36, |
|
"eval_samples_per_second": 128.154, |
|
"eval_steps_per_second": 16.049, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 2.013422818791946e-05, |
|
"loss": 0.0023, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"eval_accuracy_score": 0.9834626935527907, |
|
"eval_f1": 0.8975216365066876, |
|
"eval_loss": 0.10789068043231964, |
|
"eval_precision": 0.8843023255813953, |
|
"eval_recall": 0.9111421725239617, |
|
"eval_runtime": 25.3261, |
|
"eval_samples_per_second": 128.326, |
|
"eval_steps_per_second": 16.07, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"eval_accuracy_score": 0.9833456843090604, |
|
"eval_f1": 0.900358708648864, |
|
"eval_loss": 0.11746969074010849, |
|
"eval_precision": 0.8985680190930787, |
|
"eval_recall": 0.9021565495207667, |
|
"eval_runtime": 25.4056, |
|
"eval_samples_per_second": 127.924, |
|
"eval_steps_per_second": 16.02, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 21.41, |
|
"eval_accuracy_score": 0.9833651858496821, |
|
"eval_f1": 0.8977183320220299, |
|
"eval_loss": 0.1041969433426857, |
|
"eval_precision": 0.8844961240310077, |
|
"eval_recall": 0.9113418530351438, |
|
"eval_runtime": 25.4178, |
|
"eval_samples_per_second": 127.863, |
|
"eval_steps_per_second": 16.012, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 1.8456375838926178e-05, |
|
"loss": 0.0022, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"eval_accuracy_score": 0.9844572721244979, |
|
"eval_f1": 0.905930267209695, |
|
"eval_loss": 0.10449470579624176, |
|
"eval_precision": 0.901363905910259, |
|
"eval_recall": 0.9105431309904153, |
|
"eval_runtime": 25.3884, |
|
"eval_samples_per_second": 128.011, |
|
"eval_steps_per_second": 16.031, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"eval_accuracy_score": 0.9839502320683334, |
|
"eval_f1": 0.9034056960764787, |
|
"eval_loss": 0.10690420866012573, |
|
"eval_precision": 0.901072705601907, |
|
"eval_recall": 0.9057507987220448, |
|
"eval_runtime": 25.4089, |
|
"eval_samples_per_second": 127.908, |
|
"eval_steps_per_second": 16.018, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 1.6778523489932888e-05, |
|
"loss": 0.0019, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"eval_accuracy_score": 0.9835992043371427, |
|
"eval_f1": 0.9027144838517931, |
|
"eval_loss": 0.10763387382030487, |
|
"eval_precision": 0.8957923712151002, |
|
"eval_recall": 0.9097444089456869, |
|
"eval_runtime": 25.3298, |
|
"eval_samples_per_second": 128.307, |
|
"eval_steps_per_second": 16.068, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"eval_accuracy_score": 0.9846522875307149, |
|
"eval_f1": 0.9070299294024062, |
|
"eval_loss": 0.10220099985599518, |
|
"eval_precision": 0.9033471974648445, |
|
"eval_recall": 0.9107428115015974, |
|
"eval_runtime": 25.3799, |
|
"eval_samples_per_second": 128.054, |
|
"eval_steps_per_second": 16.036, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"eval_accuracy_score": 0.9840282382308202, |
|
"eval_f1": 0.9080059671805072, |
|
"eval_loss": 0.11143822968006134, |
|
"eval_precision": 0.9044977214186646, |
|
"eval_recall": 0.9115415335463258, |
|
"eval_runtime": 25.375, |
|
"eval_samples_per_second": 128.079, |
|
"eval_steps_per_second": 16.039, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 1.51006711409396e-05, |
|
"loss": 0.0013, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"eval_accuracy_score": 0.9839502320683334, |
|
"eval_f1": 0.9035462401907223, |
|
"eval_loss": 0.11336545646190643, |
|
"eval_precision": 0.8989918956315477, |
|
"eval_recall": 0.90814696485623, |
|
"eval_runtime": 25.4002, |
|
"eval_samples_per_second": 127.952, |
|
"eval_steps_per_second": 16.024, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"eval_accuracy_score": 0.9843012597995242, |
|
"eval_f1": 0.9057239057239057, |
|
"eval_loss": 0.10835720598697662, |
|
"eval_precision": 0.8984282907662082, |
|
"eval_recall": 0.9131389776357828, |
|
"eval_runtime": 25.3998, |
|
"eval_samples_per_second": 127.954, |
|
"eval_steps_per_second": 16.024, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"learning_rate": 1.3422818791946309e-05, |
|
"loss": 0.0009, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"eval_accuracy_score": 0.984379265962011, |
|
"eval_f1": 0.9084597837945055, |
|
"eval_loss": 0.10998024046421051, |
|
"eval_precision": 0.9024630541871921, |
|
"eval_recall": 0.9145367412140575, |
|
"eval_runtime": 25.3705, |
|
"eval_samples_per_second": 128.102, |
|
"eval_steps_per_second": 16.042, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 25.51, |
|
"eval_accuracy_score": 0.9838137212839815, |
|
"eval_f1": 0.9037773359840955, |
|
"eval_loss": 0.11049986630678177, |
|
"eval_precision": 0.8998416468725258, |
|
"eval_recall": 0.9077476038338658, |
|
"eval_runtime": 25.3707, |
|
"eval_samples_per_second": 128.1, |
|
"eval_steps_per_second": 16.042, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_accuracy_score": 0.9844767736651195, |
|
"eval_f1": 0.9064790964929661, |
|
"eval_loss": 0.10651768743991852, |
|
"eval_precision": 0.8995281163979552, |
|
"eval_recall": 0.9135383386581469, |
|
"eval_runtime": 25.4896, |
|
"eval_samples_per_second": 127.503, |
|
"eval_steps_per_second": 15.967, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 1.174496644295302e-05, |
|
"loss": 0.0012, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_accuracy_score": 0.9842817582589024, |
|
"eval_f1": 0.9092719848620655, |
|
"eval_loss": 0.1079045906662941, |
|
"eval_precision": 0.9070137095171865, |
|
"eval_recall": 0.9115415335463258, |
|
"eval_runtime": 25.3954, |
|
"eval_samples_per_second": 127.976, |
|
"eval_steps_per_second": 16.027, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_accuracy_score": 0.984184250555794, |
|
"eval_f1": 0.9041501976284585, |
|
"eval_loss": 0.10910345613956451, |
|
"eval_precision": 0.8949530516431925, |
|
"eval_recall": 0.9135383386581469, |
|
"eval_runtime": 25.3926, |
|
"eval_samples_per_second": 127.99, |
|
"eval_steps_per_second": 16.028, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 1.006711409395973e-05, |
|
"loss": 0.0007, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"eval_accuracy_score": 0.9837942197433597, |
|
"eval_f1": 0.9048899512000796, |
|
"eval_loss": 0.11004339158535004, |
|
"eval_precision": 0.9026425591098748, |
|
"eval_recall": 0.9071485623003195, |
|
"eval_runtime": 25.4253, |
|
"eval_samples_per_second": 127.826, |
|
"eval_steps_per_second": 16.008, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"eval_accuracy_score": 0.9841452474745505, |
|
"eval_f1": 0.9081166898194086, |
|
"eval_loss": 0.10656478255987167, |
|
"eval_precision": 0.9025641025641026, |
|
"eval_recall": 0.9137380191693291, |
|
"eval_runtime": 25.3771, |
|
"eval_samples_per_second": 128.068, |
|
"eval_steps_per_second": 16.038, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"eval_accuracy_score": 0.9843597644213893, |
|
"eval_f1": 0.9104314038059181, |
|
"eval_loss": 0.10543405264616013, |
|
"eval_precision": 0.9085305229667926, |
|
"eval_recall": 0.9123402555910544, |
|
"eval_runtime": 25.302, |
|
"eval_samples_per_second": 128.448, |
|
"eval_steps_per_second": 16.086, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 28.47, |
|
"learning_rate": 8.389261744966444e-06, |
|
"loss": 0.0008, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"eval_accuracy_score": 0.9839892351495768, |
|
"eval_f1": 0.9044737625235593, |
|
"eval_loss": 0.10808327049016953, |
|
"eval_precision": 0.8986792824758526, |
|
"eval_recall": 0.9103434504792333, |
|
"eval_runtime": 25.3718, |
|
"eval_samples_per_second": 128.095, |
|
"eval_steps_per_second": 16.041, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"eval_accuracy_score": 0.9845937829088498, |
|
"eval_f1": 0.9098848749503771, |
|
"eval_loss": 0.10423844307661057, |
|
"eval_precision": 0.904498816101026, |
|
"eval_recall": 0.9153354632587859, |
|
"eval_runtime": 25.3457, |
|
"eval_samples_per_second": 128.227, |
|
"eval_steps_per_second": 16.058, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"learning_rate": 6.7114093959731546e-06, |
|
"loss": 0.0007, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"eval_accuracy_score": 0.9845742813682281, |
|
"eval_f1": 0.9097922848664688, |
|
"eval_loss": 0.10766186565160751, |
|
"eval_precision": 0.9014112112896904, |
|
"eval_recall": 0.9183306709265175, |
|
"eval_runtime": 25.3084, |
|
"eval_samples_per_second": 128.416, |
|
"eval_steps_per_second": 16.082, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 30.07, |
|
"eval_accuracy_score": 0.9844962752057412, |
|
"eval_f1": 0.9077182874739247, |
|
"eval_loss": 0.10705368965864182, |
|
"eval_precision": 0.9031429136192923, |
|
"eval_recall": 0.9123402555910544, |
|
"eval_runtime": 25.4717, |
|
"eval_samples_per_second": 127.593, |
|
"eval_steps_per_second": 15.979, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 30.52, |
|
"eval_accuracy_score": 0.9840087366901985, |
|
"eval_f1": 0.9052005943536403, |
|
"eval_loss": 0.10472096502780914, |
|
"eval_precision": 0.8981718104973462, |
|
"eval_recall": 0.9123402555910544, |
|
"eval_runtime": 25.4157, |
|
"eval_samples_per_second": 127.874, |
|
"eval_steps_per_second": 16.014, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 5.033557046979865e-06, |
|
"loss": 0.0006, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_accuracy_score": 0.9845937829088498, |
|
"eval_f1": 0.9085492999702115, |
|
"eval_loss": 0.10349733382463455, |
|
"eval_precision": 0.9036144578313253, |
|
"eval_recall": 0.9135383386581469, |
|
"eval_runtime": 25.5031, |
|
"eval_samples_per_second": 127.435, |
|
"eval_steps_per_second": 15.959, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 31.44, |
|
"eval_accuracy_score": 0.9849058075587972, |
|
"eval_f1": 0.908856746582128, |
|
"eval_loss": 0.10392281413078308, |
|
"eval_precision": 0.9018875344081794, |
|
"eval_recall": 0.9159345047923323, |
|
"eval_runtime": 25.3529, |
|
"eval_samples_per_second": 128.191, |
|
"eval_steps_per_second": 16.053, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 31.89, |
|
"learning_rate": 3.3557046979865773e-06, |
|
"loss": 0.0005, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 31.89, |
|
"eval_accuracy_score": 0.9849253090994189, |
|
"eval_f1": 0.909863609408974, |
|
"eval_loss": 0.10778042674064636, |
|
"eval_precision": 0.9007827788649706, |
|
"eval_recall": 0.919129392971246, |
|
"eval_runtime": 25.3322, |
|
"eval_samples_per_second": 128.295, |
|
"eval_steps_per_second": 16.066, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 32.35, |
|
"eval_accuracy_score": 0.9845742813682281, |
|
"eval_f1": 0.9082278481012659, |
|
"eval_loss": 0.10826905816793442, |
|
"eval_precision": 0.8996865203761756, |
|
"eval_recall": 0.9169329073482428, |
|
"eval_runtime": 25.3345, |
|
"eval_samples_per_second": 128.283, |
|
"eval_steps_per_second": 16.065, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy_score": 0.9846522875307149, |
|
"eval_f1": 0.9092885375494072, |
|
"eval_loss": 0.10940501093864441, |
|
"eval_precision": 0.900039123630673, |
|
"eval_recall": 0.9187300319488818, |
|
"eval_runtime": 25.4019, |
|
"eval_samples_per_second": 127.943, |
|
"eval_steps_per_second": 16.022, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 33.03, |
|
"learning_rate": 1.6778523489932886e-06, |
|
"loss": 0.0003, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 33.26, |
|
"eval_accuracy_score": 0.9846912906119584, |
|
"eval_f1": 0.9091268664095719, |
|
"eval_loss": 0.10913100093603134, |
|
"eval_precision": 0.9004897159647405, |
|
"eval_recall": 0.9179313099041534, |
|
"eval_runtime": 25.3619, |
|
"eval_samples_per_second": 128.145, |
|
"eval_steps_per_second": 16.048, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 33.71, |
|
"eval_accuracy_score": 0.9848278013963103, |
|
"eval_f1": 0.9108891306497873, |
|
"eval_loss": 0.10917215794324875, |
|
"eval_precision": 0.9024103468547913, |
|
"eval_recall": 0.9195287539936102, |
|
"eval_runtime": 25.4358, |
|
"eval_samples_per_second": 127.773, |
|
"eval_steps_per_second": 16.001, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 34.17, |
|
"learning_rate": 0.0, |
|
"loss": 0.0003, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 34.17, |
|
"eval_accuracy_score": 0.9847692967744451, |
|
"eval_f1": 0.9102957175353575, |
|
"eval_loss": 0.1090984046459198, |
|
"eval_precision": 0.9018224573780129, |
|
"eval_recall": 0.9189297124600639, |
|
"eval_runtime": 25.2915, |
|
"eval_samples_per_second": 128.501, |
|
"eval_steps_per_second": 16.092, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 34.17, |
|
"step": 15000, |
|
"total_flos": 7.879039709550797e+16, |
|
"train_loss": 0.023325540216763814, |
|
"train_runtime": 9228.2252, |
|
"train_samples_per_second": 52.014, |
|
"train_steps_per_second": 1.625 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 35, |
|
"total_flos": 7.879039709550797e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|