|
{ |
|
"best_metric": 0.12050338089466095, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-large-2024_09_05-batch-size32_epochs150_freeze/checkpoint-22386", |
|
"epoch": 92.0, |
|
"eval_steps": 500, |
|
"global_step": 25116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.22314622314622315, |
|
"eval_f1_macro": 0.5430112866470752, |
|
"eval_f1_micro": 0.7516596896274684, |
|
"eval_loss": 0.16899551451206207, |
|
"eval_roc_auc": 0.8384250127967259, |
|
"eval_runtime": 514.8139, |
|
"eval_samples_per_second": 5.606, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.9695320725440979, |
|
"learning_rate": 0.001, |
|
"loss": 0.2719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24012474012474014, |
|
"eval_f1_macro": 0.5721428312627432, |
|
"eval_f1_micro": 0.765669700910273, |
|
"eval_loss": 0.153842031955719, |
|
"eval_roc_auc": 0.8396070197954885, |
|
"eval_runtime": 520.9151, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.23458073458073458, |
|
"eval_f1_macro": 0.6137585525531024, |
|
"eval_f1_micro": 0.7772688719253604, |
|
"eval_loss": 0.14828726649284363, |
|
"eval_roc_auc": 0.851613165443153, |
|
"eval_runtime": 519.0164, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.24952150881290436, |
|
"learning_rate": 0.001, |
|
"loss": 0.1694, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6224730910908008, |
|
"eval_f1_micro": 0.7722737615963591, |
|
"eval_loss": 0.1479637324810028, |
|
"eval_roc_auc": 0.8406560025496872, |
|
"eval_runtime": 512.3298, |
|
"eval_samples_per_second": 5.633, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6302307709949958, |
|
"eval_f1_micro": 0.779738930569409, |
|
"eval_loss": 0.14575305581092834, |
|
"eval_roc_auc": 0.8469713476749664, |
|
"eval_runtime": 509.5723, |
|
"eval_samples_per_second": 5.664, |
|
"eval_steps_per_second": 0.179, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.17697261273860931, |
|
"learning_rate": 0.001, |
|
"loss": 0.1625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2480942480942481, |
|
"eval_f1_macro": 0.6092591780781843, |
|
"eval_f1_micro": 0.7798061948433986, |
|
"eval_loss": 0.14499613642692566, |
|
"eval_roc_auc": 0.8476621294180898, |
|
"eval_runtime": 511.5766, |
|
"eval_samples_per_second": 5.641, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.2525987525987526, |
|
"eval_f1_macro": 0.624806622732382, |
|
"eval_f1_micro": 0.7767369242779079, |
|
"eval_loss": 0.1474585235118866, |
|
"eval_roc_auc": 0.8453623673245133, |
|
"eval_runtime": 510.6821, |
|
"eval_samples_per_second": 5.651, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.24790136516094208, |
|
"learning_rate": 0.001, |
|
"loss": 0.1592, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.25744975744975745, |
|
"eval_f1_macro": 0.6249401475720361, |
|
"eval_f1_micro": 0.7803859753759638, |
|
"eval_loss": 0.14568069577217102, |
|
"eval_roc_auc": 0.8520784887308331, |
|
"eval_runtime": 514.1071, |
|
"eval_samples_per_second": 5.614, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.25744975744975745, |
|
"eval_f1_macro": 0.652642904607388, |
|
"eval_f1_micro": 0.7868685150535805, |
|
"eval_loss": 0.14169421792030334, |
|
"eval_roc_auc": 0.8560913219420118, |
|
"eval_runtime": 518.322, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.2022881656885147, |
|
"learning_rate": 0.001, |
|
"loss": 0.157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.25467775467775466, |
|
"eval_f1_macro": 0.6289931868767601, |
|
"eval_f1_micro": 0.7757335098168984, |
|
"eval_loss": 0.1436299830675125, |
|
"eval_roc_auc": 0.8403493908543921, |
|
"eval_runtime": 514.5559, |
|
"eval_samples_per_second": 5.609, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.16650938987731934, |
|
"learning_rate": 0.001, |
|
"loss": 0.1563, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6447870111639475, |
|
"eval_f1_micro": 0.7886988341417751, |
|
"eval_loss": 0.1428152322769165, |
|
"eval_roc_auc": 0.8569209092596786, |
|
"eval_runtime": 515.3716, |
|
"eval_samples_per_second": 5.6, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.25814275814275817, |
|
"eval_f1_macro": 0.6493205009564239, |
|
"eval_f1_micro": 0.7904845227679873, |
|
"eval_loss": 0.1438700556755066, |
|
"eval_roc_auc": 0.8637698939454586, |
|
"eval_runtime": 512.0567, |
|
"eval_samples_per_second": 5.636, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.163461372256279, |
|
"learning_rate": 0.001, |
|
"loss": 0.1558, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2713097713097713, |
|
"eval_f1_macro": 0.6561811626743236, |
|
"eval_f1_micro": 0.7906956746065871, |
|
"eval_loss": 0.13913600146770477, |
|
"eval_roc_auc": 0.8551388511813229, |
|
"eval_runtime": 511.5092, |
|
"eval_samples_per_second": 5.642, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.2643797643797644, |
|
"eval_f1_macro": 0.6337626365639194, |
|
"eval_f1_micro": 0.783810807286006, |
|
"eval_loss": 0.14094506204128265, |
|
"eval_roc_auc": 0.8484895839481307, |
|
"eval_runtime": 513.4311, |
|
"eval_samples_per_second": 5.621, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.17725127935409546, |
|
"learning_rate": 0.001, |
|
"loss": 0.1543, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2577962577962578, |
|
"eval_f1_macro": 0.6463067634895379, |
|
"eval_f1_micro": 0.7907172995780591, |
|
"eval_loss": 0.1396123319864273, |
|
"eval_roc_auc": 0.8603407738558333, |
|
"eval_runtime": 520.2063, |
|
"eval_samples_per_second": 5.548, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2654192654192654, |
|
"eval_f1_macro": 0.6593840515969085, |
|
"eval_f1_micro": 0.7913274487959551, |
|
"eval_loss": 0.13904806971549988, |
|
"eval_roc_auc": 0.856418510343081, |
|
"eval_runtime": 522.3782, |
|
"eval_samples_per_second": 5.525, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.16505596041679382, |
|
"learning_rate": 0.001, |
|
"loss": 0.1535, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2564102564102564, |
|
"eval_f1_macro": 0.6585824628325464, |
|
"eval_f1_micro": 0.7939832128313804, |
|
"eval_loss": 0.1418265849351883, |
|
"eval_roc_auc": 0.8664525383660324, |
|
"eval_runtime": 520.8828, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.26576576576576577, |
|
"eval_f1_macro": 0.6560187518750095, |
|
"eval_f1_micro": 0.7957187827911858, |
|
"eval_loss": 0.14155420660972595, |
|
"eval_roc_auc": 0.864558649781785, |
|
"eval_runtime": 521.9656, |
|
"eval_samples_per_second": 5.529, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.176731139421463, |
|
"learning_rate": 0.001, |
|
"loss": 0.1549, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.262993762993763, |
|
"eval_f1_macro": 0.6524018082903621, |
|
"eval_f1_micro": 0.7885625699767461, |
|
"eval_loss": 0.14027266204357147, |
|
"eval_roc_auc": 0.8535729424099051, |
|
"eval_runtime": 525.594, |
|
"eval_samples_per_second": 5.491, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.26126126126126126, |
|
"eval_f1_macro": 0.6558190248610255, |
|
"eval_f1_micro": 0.7910696719558615, |
|
"eval_loss": 0.14759798347949982, |
|
"eval_roc_auc": 0.8567849608157283, |
|
"eval_runtime": 533.1376, |
|
"eval_samples_per_second": 5.413, |
|
"eval_steps_per_second": 0.171, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.15767891705036163, |
|
"learning_rate": 0.001, |
|
"loss": 0.154, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.26576576576576577, |
|
"eval_f1_macro": 0.6397027546064713, |
|
"eval_f1_micro": 0.7879767016708474, |
|
"eval_loss": 0.14285211265087128, |
|
"eval_roc_auc": 0.8567511447301636, |
|
"eval_runtime": 527.0011, |
|
"eval_samples_per_second": 5.476, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.18300685286521912, |
|
"learning_rate": 0.001, |
|
"loss": 0.1529, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.26126126126126126, |
|
"eval_f1_macro": 0.650810186340724, |
|
"eval_f1_micro": 0.7936799099512236, |
|
"eval_loss": 0.141402930021286, |
|
"eval_roc_auc": 0.8653510005054305, |
|
"eval_runtime": 525.9127, |
|
"eval_samples_per_second": 5.488, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.26853776853776856, |
|
"eval_f1_macro": 0.6618136826297922, |
|
"eval_f1_micro": 0.7975794766896787, |
|
"eval_loss": 0.1415141373872757, |
|
"eval_roc_auc": 0.8613092204030781, |
|
"eval_runtime": 530.5247, |
|
"eval_samples_per_second": 5.44, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.0001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.16848017275333405, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1449, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.27893277893277896, |
|
"eval_f1_macro": 0.6750686264509598, |
|
"eval_f1_micro": 0.8044778018063861, |
|
"eval_loss": 0.13230843842029572, |
|
"eval_roc_auc": 0.8664561198395929, |
|
"eval_runtime": 521.5756, |
|
"eval_samples_per_second": 5.533, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.27927927927927926, |
|
"eval_f1_macro": 0.6724022117445357, |
|
"eval_f1_micro": 0.8044072500946213, |
|
"eval_loss": 0.13101588189601898, |
|
"eval_roc_auc": 0.868781233937024, |
|
"eval_runtime": 523.3306, |
|
"eval_samples_per_second": 5.515, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.16336454451084137, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1416, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.28205128205128205, |
|
"eval_f1_macro": 0.6689442300740391, |
|
"eval_f1_micro": 0.8035965398218775, |
|
"eval_loss": 0.13268393278121948, |
|
"eval_roc_auc": 0.8645798435204571, |
|
"eval_runtime": 532.8406, |
|
"eval_samples_per_second": 5.416, |
|
"eval_steps_per_second": 0.171, |
|
"learning_rate": 0.0001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2817047817047817, |
|
"eval_f1_macro": 0.679681812643572, |
|
"eval_f1_micro": 0.8068647969861867, |
|
"eval_loss": 0.1317097693681717, |
|
"eval_roc_auc": 0.8714747032608311, |
|
"eval_runtime": 527.4278, |
|
"eval_samples_per_second": 5.472, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.1572931855916977, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1391, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.27754677754677753, |
|
"eval_f1_macro": 0.6818462300001074, |
|
"eval_f1_micro": 0.8072126727334008, |
|
"eval_loss": 0.12880520522594452, |
|
"eval_roc_auc": 0.8697994857701482, |
|
"eval_runtime": 536.9046, |
|
"eval_samples_per_second": 5.375, |
|
"eval_steps_per_second": 0.169, |
|
"learning_rate": 0.0001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2844767844767845, |
|
"eval_f1_macro": 0.6807929806344717, |
|
"eval_f1_micro": 0.8038088702067427, |
|
"eval_loss": 0.12942521274089813, |
|
"eval_roc_auc": 0.8628519636133017, |
|
"eval_runtime": 520.5065, |
|
"eval_samples_per_second": 5.545, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.19199338555335999, |
|
"learning_rate": 0.0001, |
|
"loss": 0.138, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6825529208005033, |
|
"eval_f1_micro": 0.8077149835761811, |
|
"eval_loss": 0.12943296134471893, |
|
"eval_roc_auc": 0.8701959964759374, |
|
"eval_runtime": 543.5755, |
|
"eval_samples_per_second": 5.309, |
|
"eval_steps_per_second": 0.167, |
|
"learning_rate": 0.0001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.28794178794178793, |
|
"eval_f1_macro": 0.6779122940127521, |
|
"eval_f1_micro": 0.8073808915025994, |
|
"eval_loss": 0.12738928198814392, |
|
"eval_roc_auc": 0.8666172459085354, |
|
"eval_runtime": 521.4164, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.1997932642698288, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1364, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2882882882882883, |
|
"eval_f1_macro": 0.6868638344898197, |
|
"eval_f1_micro": 0.8104185890445432, |
|
"eval_loss": 0.12775012850761414, |
|
"eval_roc_auc": 0.8728485806633693, |
|
"eval_runtime": 519.8308, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.19476589560508728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1359, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.2869022869022869, |
|
"eval_f1_macro": 0.6810807224403135, |
|
"eval_f1_micro": 0.8077248140635565, |
|
"eval_loss": 0.12765593826770782, |
|
"eval_roc_auc": 0.8692062891212271, |
|
"eval_runtime": 514.7142, |
|
"eval_samples_per_second": 5.607, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.2882882882882883, |
|
"eval_f1_macro": 0.687361527737602, |
|
"eval_f1_micro": 0.8108837797932926, |
|
"eval_loss": 0.12660712003707886, |
|
"eval_roc_auc": 0.8714320206807965, |
|
"eval_runtime": 514.9645, |
|
"eval_samples_per_second": 5.604, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.2034957855939865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1341, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.29036729036729036, |
|
"eval_f1_macro": 0.688483181989703, |
|
"eval_f1_micro": 0.8103963941193815, |
|
"eval_loss": 0.1262102574110031, |
|
"eval_roc_auc": 0.8715800817488106, |
|
"eval_runtime": 525.0872, |
|
"eval_samples_per_second": 5.496, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.6876394944988364, |
|
"eval_f1_micro": 0.8070400273399119, |
|
"eval_loss": 0.12687553465366364, |
|
"eval_roc_auc": 0.8657418371913091, |
|
"eval_runtime": 513.0757, |
|
"eval_samples_per_second": 5.625, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.20557202398777008, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1339, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.28655578655578656, |
|
"eval_f1_macro": 0.6833930255395054, |
|
"eval_f1_micro": 0.8081597960050999, |
|
"eval_loss": 0.12656189501285553, |
|
"eval_roc_auc": 0.8678163688633396, |
|
"eval_runtime": 515.3436, |
|
"eval_samples_per_second": 5.6, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.6936175483283518, |
|
"eval_f1_micro": 0.8106371284826448, |
|
"eval_loss": 0.12547720968723297, |
|
"eval_roc_auc": 0.8706625538294134, |
|
"eval_runtime": 512.1358, |
|
"eval_samples_per_second": 5.635, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.2112371176481247, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1307, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.2927927927927928, |
|
"eval_f1_macro": 0.6985657340894045, |
|
"eval_f1_micro": 0.8141880626875626, |
|
"eval_loss": 0.12485096603631973, |
|
"eval_roc_auc": 0.8767653445350737, |
|
"eval_runtime": 512.6109, |
|
"eval_samples_per_second": 5.63, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2934857934857935, |
|
"eval_f1_macro": 0.6989554260935754, |
|
"eval_f1_micro": 0.8138017044273539, |
|
"eval_loss": 0.1257668137550354, |
|
"eval_roc_auc": 0.8773247787534647, |
|
"eval_runtime": 513.8833, |
|
"eval_samples_per_second": 5.616, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.23032954335212708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1317, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.29244629244629244, |
|
"eval_f1_macro": 0.6923923602014324, |
|
"eval_f1_micro": 0.8101351925856646, |
|
"eval_loss": 0.12528541684150696, |
|
"eval_roc_auc": 0.8687915491174283, |
|
"eval_runtime": 513.0005, |
|
"eval_samples_per_second": 5.626, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.3004158004158004, |
|
"eval_f1_macro": 0.6970236383039276, |
|
"eval_f1_micro": 0.8138018093835474, |
|
"eval_loss": 0.12443084269762039, |
|
"eval_roc_auc": 0.8737649281720051, |
|
"eval_runtime": 525.5315, |
|
"eval_samples_per_second": 5.492, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.23487386107444763, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1308, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.6956334056896907, |
|
"eval_f1_micro": 0.8131470414948238, |
|
"eval_loss": 0.12451612949371338, |
|
"eval_roc_auc": 0.8733690344991142, |
|
"eval_runtime": 514.3778, |
|
"eval_samples_per_second": 5.611, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.25621357560157776, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1307, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6915470420512126, |
|
"eval_f1_micro": 0.812950847173293, |
|
"eval_loss": 0.12501148879528046, |
|
"eval_roc_auc": 0.8742664283667729, |
|
"eval_runtime": 519.3764, |
|
"eval_samples_per_second": 5.557, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.29625779625779625, |
|
"eval_f1_macro": 0.7050548840380568, |
|
"eval_f1_micro": 0.8136846971798428, |
|
"eval_loss": 0.12397606670856476, |
|
"eval_roc_auc": 0.8740443367647517, |
|
"eval_runtime": 515.8997, |
|
"eval_samples_per_second": 5.594, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.22914335131645203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1295, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.29764379764379767, |
|
"eval_f1_macro": 0.6987723620069867, |
|
"eval_f1_micro": 0.8130628734954971, |
|
"eval_loss": 0.12409698963165283, |
|
"eval_roc_auc": 0.8733228777555885, |
|
"eval_runtime": 516.6269, |
|
"eval_samples_per_second": 5.586, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.6957628076563835, |
|
"eval_f1_micro": 0.811911298838437, |
|
"eval_loss": 0.12429661303758621, |
|
"eval_roc_auc": 0.8716271908692008, |
|
"eval_runtime": 518.0917, |
|
"eval_samples_per_second": 5.57, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.25639113783836365, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1293, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.2955647955647956, |
|
"eval_f1_macro": 0.6990296569974817, |
|
"eval_f1_micro": 0.8135280295401142, |
|
"eval_loss": 0.12393072247505188, |
|
"eval_roc_auc": 0.874436809929186, |
|
"eval_runtime": 517.4634, |
|
"eval_samples_per_second": 5.577, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.29972279972279975, |
|
"eval_f1_macro": 0.7007060102949784, |
|
"eval_f1_micro": 0.8152993625265614, |
|
"eval_loss": 0.1242954283952713, |
|
"eval_roc_auc": 0.8774914581184896, |
|
"eval_runtime": 511.3749, |
|
"eval_samples_per_second": 5.644, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.27197974920272827, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1274, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.6999734070385492, |
|
"eval_f1_micro": 0.8151919866444074, |
|
"eval_loss": 0.12405084818601608, |
|
"eval_roc_auc": 0.8769273693258459, |
|
"eval_runtime": 509.3276, |
|
"eval_samples_per_second": 5.666, |
|
"eval_steps_per_second": 0.179, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.3011088011088011, |
|
"eval_f1_macro": 0.7055935576453343, |
|
"eval_f1_micro": 0.8153039745759215, |
|
"eval_loss": 0.12483017891645432, |
|
"eval_roc_auc": 0.8803007418345086, |
|
"eval_runtime": 511.0056, |
|
"eval_samples_per_second": 5.648, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.23091430962085724, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1271, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7035566403965832, |
|
"eval_f1_micro": 0.8157241959217996, |
|
"eval_loss": 0.12426182627677917, |
|
"eval_roc_auc": 0.8750656737623661, |
|
"eval_runtime": 511.1647, |
|
"eval_samples_per_second": 5.646, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7031528349086803, |
|
"eval_f1_micro": 0.8152648882600192, |
|
"eval_loss": 0.12408608943223953, |
|
"eval_roc_auc": 0.8778170234547618, |
|
"eval_runtime": 520.4, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.23177389800548553, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1275, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7067666695453366, |
|
"eval_f1_micro": 0.8152251458307105, |
|
"eval_loss": 0.12344320118427277, |
|
"eval_roc_auc": 0.8753333050750151, |
|
"eval_runtime": 522.8329, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.3403611481189728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1256, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7075536762185066, |
|
"eval_f1_micro": 0.8166332665330662, |
|
"eval_loss": 0.12307523190975189, |
|
"eval_roc_auc": 0.8776256091187804, |
|
"eval_runtime": 513.5394, |
|
"eval_samples_per_second": 5.62, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7087921855865761, |
|
"eval_f1_micro": 0.8189626693095475, |
|
"eval_loss": 0.12282071262598038, |
|
"eval_roc_auc": 0.8821854285803199, |
|
"eval_runtime": 519.2592, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.28649473190307617, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1258, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.306999306999307, |
|
"eval_f1_macro": 0.7079839879234633, |
|
"eval_f1_micro": 0.8160328019748128, |
|
"eval_loss": 0.12259934842586517, |
|
"eval_roc_auc": 0.8766650096203477, |
|
"eval_runtime": 523.8952, |
|
"eval_samples_per_second": 5.509, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7072503847729165, |
|
"eval_f1_micro": 0.8170145133631687, |
|
"eval_loss": 0.12334763258695602, |
|
"eval_roc_auc": 0.8773053153896588, |
|
"eval_runtime": 522.7463, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.2677023112773895, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1258, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.713532815646164, |
|
"eval_f1_micro": 0.8172105834237543, |
|
"eval_loss": 0.12272054702043533, |
|
"eval_roc_auc": 0.8780682765680952, |
|
"eval_runtime": 524.8476, |
|
"eval_samples_per_second": 5.499, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7039801220819605, |
|
"eval_f1_micro": 0.8142579609764339, |
|
"eval_loss": 0.12334387749433517, |
|
"eval_roc_auc": 0.8729462194126062, |
|
"eval_runtime": 526.97, |
|
"eval_samples_per_second": 5.477, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.273879736661911, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1252, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.3042273042273042, |
|
"eval_f1_macro": 0.7120578542808926, |
|
"eval_f1_micro": 0.816814564846061, |
|
"eval_loss": 0.12339764833450317, |
|
"eval_roc_auc": 0.8783554248995846, |
|
"eval_runtime": 524.4656, |
|
"eval_samples_per_second": 5.503, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7124854785684515, |
|
"eval_f1_micro": 0.8169309505831026, |
|
"eval_loss": 0.12234435975551605, |
|
"eval_roc_auc": 0.876382515863111, |
|
"eval_runtime": 518.389, |
|
"eval_samples_per_second": 5.567, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.2836596667766571, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1238, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.30353430353430355, |
|
"eval_f1_macro": 0.709030237195192, |
|
"eval_f1_micro": 0.8151443922095366, |
|
"eval_loss": 0.12311259657144547, |
|
"eval_roc_auc": 0.875227363209172, |
|
"eval_runtime": 523.1406, |
|
"eval_samples_per_second": 5.517, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7114197657112039, |
|
"eval_f1_micro": 0.8183222681531587, |
|
"eval_loss": 0.12282687425613403, |
|
"eval_roc_auc": 0.8785221042646094, |
|
"eval_runtime": 525.9879, |
|
"eval_samples_per_second": 5.487, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.327009916305542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1247, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.30353430353430355, |
|
"eval_f1_macro": 0.715610525327271, |
|
"eval_f1_micro": 0.8185065204751224, |
|
"eval_loss": 0.12305620312690735, |
|
"eval_roc_auc": 0.8802214933483853, |
|
"eval_runtime": 527.6963, |
|
"eval_samples_per_second": 5.469, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.3439556360244751, |
|
"learning_rate": 0.0001, |
|
"loss": 0.123, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7083957677770276, |
|
"eval_f1_micro": 0.8193021036471515, |
|
"eval_loss": 0.12252139300107956, |
|
"eval_roc_auc": 0.8809488409975973, |
|
"eval_runtime": 523.6027, |
|
"eval_samples_per_second": 5.512, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.3031878031878032, |
|
"eval_f1_macro": 0.713563304331985, |
|
"eval_f1_micro": 0.8185542268382505, |
|
"eval_loss": 0.12215397506952286, |
|
"eval_roc_auc": 0.8813502879665707, |
|
"eval_runtime": 528.5406, |
|
"eval_samples_per_second": 5.46, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.3434881269931793, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1224, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.3090783090783091, |
|
"eval_f1_macro": 0.7169216330412181, |
|
"eval_f1_micro": 0.8201218248870841, |
|
"eval_loss": 0.12200037389993668, |
|
"eval_roc_auc": 0.8818022645643908, |
|
"eval_runtime": 525.6971, |
|
"eval_samples_per_second": 5.49, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7165157275423649, |
|
"eval_f1_micro": 0.8171493231633209, |
|
"eval_loss": 0.12282921373844147, |
|
"eval_roc_auc": 0.8767867663076429, |
|
"eval_runtime": 539.1574, |
|
"eval_samples_per_second": 5.353, |
|
"eval_steps_per_second": 0.169, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.2773456275463104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1228, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.3042273042273042, |
|
"eval_f1_macro": 0.7130922408537738, |
|
"eval_f1_micro": 0.8176893032631977, |
|
"eval_loss": 0.12265007942914963, |
|
"eval_roc_auc": 0.8764658555456234, |
|
"eval_runtime": 532.0042, |
|
"eval_samples_per_second": 5.425, |
|
"eval_steps_per_second": 0.171, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.7123118599173115, |
|
"eval_f1_micro": 0.8155257705805251, |
|
"eval_loss": 0.12318737804889679, |
|
"eval_roc_auc": 0.8733064995562728, |
|
"eval_runtime": 512.5227, |
|
"eval_samples_per_second": 5.631, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 0.0001, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.32921841740608215, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1224, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.30561330561330563, |
|
"eval_f1_macro": 0.7181217472368024, |
|
"eval_f1_micro": 0.8177146438270315, |
|
"eval_loss": 0.12224896252155304, |
|
"eval_roc_auc": 0.8780131460200304, |
|
"eval_runtime": 526.8353, |
|
"eval_samples_per_second": 5.478, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.7046690012290543, |
|
"eval_f1_micro": 0.8161570403926011, |
|
"eval_loss": 0.12214501202106476, |
|
"eval_roc_auc": 0.8759937448960649, |
|
"eval_runtime": 523.4325, |
|
"eval_samples_per_second": 5.514, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.27500712871551514, |
|
"learning_rate": 0.0001, |
|
"loss": 0.122, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.7070482653980339, |
|
"eval_f1_micro": 0.8147835269271382, |
|
"eval_loss": 0.12297073751688004, |
|
"eval_roc_auc": 0.8731965201490751, |
|
"eval_runtime": 521.6748, |
|
"eval_samples_per_second": 5.532, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7123584497861349, |
|
"eval_f1_micro": 0.8175831550689987, |
|
"eval_loss": 0.12141965329647064, |
|
"eval_roc_auc": 0.876778409536002, |
|
"eval_runtime": 521.9582, |
|
"eval_samples_per_second": 5.529, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 1e-05, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.34586936235427856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1201, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7265282519195887, |
|
"eval_f1_micro": 0.8212704324436167, |
|
"eval_loss": 0.12091591954231262, |
|
"eval_roc_auc": 0.8828403151052873, |
|
"eval_runtime": 515.687, |
|
"eval_samples_per_second": 5.596, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.3650946617126465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1192, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7249141687532618, |
|
"eval_f1_micro": 0.8221009885557243, |
|
"eval_loss": 0.12162773311138153, |
|
"eval_roc_auc": 0.88597146196019, |
|
"eval_runtime": 505.8066, |
|
"eval_samples_per_second": 5.706, |
|
"eval_steps_per_second": 0.18, |
|
"learning_rate": 1e-05, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.30561330561330563, |
|
"eval_f1_macro": 0.7232913822219021, |
|
"eval_f1_micro": 0.821013443640124, |
|
"eval_loss": 0.12114103883504868, |
|
"eval_roc_auc": 0.8828214151193448, |
|
"eval_runtime": 515.423, |
|
"eval_samples_per_second": 5.599, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.3805921673774719, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1178, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.30561330561330563, |
|
"eval_f1_macro": 0.7157592534107864, |
|
"eval_f1_micro": 0.8181284095677717, |
|
"eval_loss": 0.1210767850279808, |
|
"eval_roc_auc": 0.8769422854254683, |
|
"eval_runtime": 524.7026, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 1e-05, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.3090783090783091, |
|
"eval_f1_macro": 0.7196736600383237, |
|
"eval_f1_micro": 0.8200463116109824, |
|
"eval_loss": 0.12099559605121613, |
|
"eval_roc_auc": 0.8823936101146178, |
|
"eval_runtime": 518.5996, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.38496658205986023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1178, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.31046431046431044, |
|
"eval_f1_macro": 0.7194056763702963, |
|
"eval_f1_micro": 0.8189727287937092, |
|
"eval_loss": 0.12053155153989792, |
|
"eval_roc_auc": 0.8783734261636972, |
|
"eval_runtime": 517.5249, |
|
"eval_samples_per_second": 5.577, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.306999306999307, |
|
"eval_f1_macro": 0.7212694332008583, |
|
"eval_f1_micro": 0.8186875235267054, |
|
"eval_loss": 0.12050338089466095, |
|
"eval_roc_auc": 0.8782284502601733, |
|
"eval_runtime": 511.5713, |
|
"eval_samples_per_second": 5.641, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 1e-05, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.29807013273239136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1162, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7136069207682542, |
|
"eval_f1_micro": 0.817129142279675, |
|
"eval_loss": 0.12153622508049011, |
|
"eval_roc_auc": 0.8753921914755026, |
|
"eval_runtime": 514.5554, |
|
"eval_samples_per_second": 5.609, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.3115038115038115, |
|
"eval_f1_macro": 0.72263281374496, |
|
"eval_f1_micro": 0.8212135055442501, |
|
"eval_loss": 0.12091034650802612, |
|
"eval_roc_auc": 0.8817381602117871, |
|
"eval_runtime": 514.2801, |
|
"eval_samples_per_second": 5.612, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.4926730692386627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1174, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.7219026145386024, |
|
"eval_f1_micro": 0.8212908842183808, |
|
"eval_loss": 0.12058679759502411, |
|
"eval_roc_auc": 0.8823288887291161, |
|
"eval_runtime": 513.2258, |
|
"eval_samples_per_second": 5.623, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7255503995321377, |
|
"eval_f1_micro": 0.8206727371003285, |
|
"eval_loss": 0.1210218220949173, |
|
"eval_roc_auc": 0.8810894976708349, |
|
"eval_runtime": 516.6336, |
|
"eval_samples_per_second": 5.586, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.3941400647163391, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1167, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7163464112504625, |
|
"eval_f1_micro": 0.81919187715867, |
|
"eval_loss": 0.12097787857055664, |
|
"eval_roc_auc": 0.8800195357981024, |
|
"eval_runtime": 514.309, |
|
"eval_samples_per_second": 5.611, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.30446189641952515, |
|
"learning_rate": 1e-05, |
|
"loss": 0.116, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.30942480942480943, |
|
"eval_f1_macro": 0.7179611359738045, |
|
"eval_f1_micro": 0.8219223445649475, |
|
"eval_loss": 0.12078534066677094, |
|
"eval_roc_auc": 0.8831166385335435, |
|
"eval_runtime": 513.9044, |
|
"eval_samples_per_second": 5.616, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1e-05, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3125433125433125, |
|
"eval_f1_macro": 0.7293063087262872, |
|
"eval_f1_micro": 0.8235824319895118, |
|
"eval_loss": 0.1213160827755928, |
|
"eval_roc_auc": 0.8871674997505042, |
|
"eval_runtime": 514.1086, |
|
"eval_samples_per_second": 5.614, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.2996889054775238, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1161, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.3108108108108108, |
|
"eval_f1_macro": 0.7249894355418997, |
|
"eval_f1_micro": 0.8228019165403988, |
|
"eval_loss": 0.12110408395528793, |
|
"eval_roc_auc": 0.8868651536304606, |
|
"eval_runtime": 511.9928, |
|
"eval_samples_per_second": 5.637, |
|
"eval_steps_per_second": 0.178, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.31046431046431044, |
|
"eval_f1_macro": 0.7187027508297176, |
|
"eval_f1_micro": 0.8191074795725959, |
|
"eval_loss": 0.1205781027674675, |
|
"eval_roc_auc": 0.8779146622039986, |
|
"eval_runtime": 513.1929, |
|
"eval_samples_per_second": 5.624, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.30445897579193115, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1162, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.31046431046431044, |
|
"eval_f1_macro": 0.7150284118631205, |
|
"eval_f1_micro": 0.8196009683612989, |
|
"eval_loss": 0.12076584249734879, |
|
"eval_roc_auc": 0.8793405313350767, |
|
"eval_runtime": 520.7671, |
|
"eval_samples_per_second": 5.542, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 25116, |
|
"total_flos": 1.1890234809282512e+21, |
|
"train_loss": 0.1360613288991788, |
|
"train_runtime": 194834.2342, |
|
"train_samples_per_second": 6.71, |
|
"train_steps_per_second": 0.21 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1890234809282512e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|