lombardata's picture
Evaluation on the test set completed on 2024_09_08.
8e9825c verified
{
"best_metric": 0.12050338089466095,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-large-2024_09_05-batch-size32_epochs150_freeze/checkpoint-22386",
"epoch": 92.0,
"eval_steps": 500,
"global_step": 25116,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.22314622314622315,
"eval_f1_macro": 0.5430112866470752,
"eval_f1_micro": 0.7516596896274684,
"eval_loss": 0.16899551451206207,
"eval_roc_auc": 0.8384250127967259,
"eval_runtime": 514.8139,
"eval_samples_per_second": 5.606,
"eval_steps_per_second": 0.177,
"learning_rate": 0.001,
"step": 273
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.9695320725440979,
"learning_rate": 0.001,
"loss": 0.2719,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.24012474012474014,
"eval_f1_macro": 0.5721428312627432,
"eval_f1_micro": 0.765669700910273,
"eval_loss": 0.153842031955719,
"eval_roc_auc": 0.8396070197954885,
"eval_runtime": 520.9151,
"eval_samples_per_second": 5.54,
"eval_steps_per_second": 0.175,
"learning_rate": 0.001,
"step": 546
},
{
"epoch": 3.0,
"eval_accuracy": 0.23458073458073458,
"eval_f1_macro": 0.6137585525531024,
"eval_f1_micro": 0.7772688719253604,
"eval_loss": 0.14828726649284363,
"eval_roc_auc": 0.851613165443153,
"eval_runtime": 519.0164,
"eval_samples_per_second": 5.561,
"eval_steps_per_second": 0.175,
"learning_rate": 0.001,
"step": 819
},
{
"epoch": 3.663003663003663,
"grad_norm": 0.24952150881290436,
"learning_rate": 0.001,
"loss": 0.1694,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.2494802494802495,
"eval_f1_macro": 0.6224730910908008,
"eval_f1_micro": 0.7722737615963591,
"eval_loss": 0.1479637324810028,
"eval_roc_auc": 0.8406560025496872,
"eval_runtime": 512.3298,
"eval_samples_per_second": 5.633,
"eval_steps_per_second": 0.178,
"learning_rate": 0.001,
"step": 1092
},
{
"epoch": 5.0,
"eval_accuracy": 0.2494802494802495,
"eval_f1_macro": 0.6302307709949958,
"eval_f1_micro": 0.779738930569409,
"eval_loss": 0.14575305581092834,
"eval_roc_auc": 0.8469713476749664,
"eval_runtime": 509.5723,
"eval_samples_per_second": 5.664,
"eval_steps_per_second": 0.179,
"learning_rate": 0.001,
"step": 1365
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.17697261273860931,
"learning_rate": 0.001,
"loss": 0.1625,
"step": 1500
},
{
"epoch": 6.0,
"eval_accuracy": 0.2480942480942481,
"eval_f1_macro": 0.6092591780781843,
"eval_f1_micro": 0.7798061948433986,
"eval_loss": 0.14499613642692566,
"eval_roc_auc": 0.8476621294180898,
"eval_runtime": 511.5766,
"eval_samples_per_second": 5.641,
"eval_steps_per_second": 0.178,
"learning_rate": 0.001,
"step": 1638
},
{
"epoch": 7.0,
"eval_accuracy": 0.2525987525987526,
"eval_f1_macro": 0.624806622732382,
"eval_f1_micro": 0.7767369242779079,
"eval_loss": 0.1474585235118866,
"eval_roc_auc": 0.8453623673245133,
"eval_runtime": 510.6821,
"eval_samples_per_second": 5.651,
"eval_steps_per_second": 0.178,
"learning_rate": 0.001,
"step": 1911
},
{
"epoch": 7.326007326007326,
"grad_norm": 0.24790136516094208,
"learning_rate": 0.001,
"loss": 0.1592,
"step": 2000
},
{
"epoch": 8.0,
"eval_accuracy": 0.25744975744975745,
"eval_f1_macro": 0.6249401475720361,
"eval_f1_micro": 0.7803859753759638,
"eval_loss": 0.14568069577217102,
"eval_roc_auc": 0.8520784887308331,
"eval_runtime": 514.1071,
"eval_samples_per_second": 5.614,
"eval_steps_per_second": 0.177,
"learning_rate": 0.001,
"step": 2184
},
{
"epoch": 9.0,
"eval_accuracy": 0.25744975744975745,
"eval_f1_macro": 0.652642904607388,
"eval_f1_micro": 0.7868685150535805,
"eval_loss": 0.14169421792030334,
"eval_roc_auc": 0.8560913219420118,
"eval_runtime": 518.322,
"eval_samples_per_second": 5.568,
"eval_steps_per_second": 0.176,
"learning_rate": 0.001,
"step": 2457
},
{
"epoch": 9.157509157509157,
"grad_norm": 0.2022881656885147,
"learning_rate": 0.001,
"loss": 0.157,
"step": 2500
},
{
"epoch": 10.0,
"eval_accuracy": 0.25467775467775466,
"eval_f1_macro": 0.6289931868767601,
"eval_f1_micro": 0.7757335098168984,
"eval_loss": 0.1436299830675125,
"eval_roc_auc": 0.8403493908543921,
"eval_runtime": 514.5559,
"eval_samples_per_second": 5.609,
"eval_steps_per_second": 0.177,
"learning_rate": 0.001,
"step": 2730
},
{
"epoch": 10.989010989010989,
"grad_norm": 0.16650938987731934,
"learning_rate": 0.001,
"loss": 0.1563,
"step": 3000
},
{
"epoch": 11.0,
"eval_accuracy": 0.26403326403326405,
"eval_f1_macro": 0.6447870111639475,
"eval_f1_micro": 0.7886988341417751,
"eval_loss": 0.1428152322769165,
"eval_roc_auc": 0.8569209092596786,
"eval_runtime": 515.3716,
"eval_samples_per_second": 5.6,
"eval_steps_per_second": 0.177,
"learning_rate": 0.001,
"step": 3003
},
{
"epoch": 12.0,
"eval_accuracy": 0.25814275814275817,
"eval_f1_macro": 0.6493205009564239,
"eval_f1_micro": 0.7904845227679873,
"eval_loss": 0.1438700556755066,
"eval_roc_auc": 0.8637698939454586,
"eval_runtime": 512.0567,
"eval_samples_per_second": 5.636,
"eval_steps_per_second": 0.178,
"learning_rate": 0.001,
"step": 3276
},
{
"epoch": 12.820512820512821,
"grad_norm": 0.163461372256279,
"learning_rate": 0.001,
"loss": 0.1558,
"step": 3500
},
{
"epoch": 13.0,
"eval_accuracy": 0.2713097713097713,
"eval_f1_macro": 0.6561811626743236,
"eval_f1_micro": 0.7906956746065871,
"eval_loss": 0.13913600146770477,
"eval_roc_auc": 0.8551388511813229,
"eval_runtime": 511.5092,
"eval_samples_per_second": 5.642,
"eval_steps_per_second": 0.178,
"learning_rate": 0.001,
"step": 3549
},
{
"epoch": 14.0,
"eval_accuracy": 0.2643797643797644,
"eval_f1_macro": 0.6337626365639194,
"eval_f1_micro": 0.783810807286006,
"eval_loss": 0.14094506204128265,
"eval_roc_auc": 0.8484895839481307,
"eval_runtime": 513.4311,
"eval_samples_per_second": 5.621,
"eval_steps_per_second": 0.177,
"learning_rate": 0.001,
"step": 3822
},
{
"epoch": 14.652014652014651,
"grad_norm": 0.17725127935409546,
"learning_rate": 0.001,
"loss": 0.1543,
"step": 4000
},
{
"epoch": 15.0,
"eval_accuracy": 0.2577962577962578,
"eval_f1_macro": 0.6463067634895379,
"eval_f1_micro": 0.7907172995780591,
"eval_loss": 0.1396123319864273,
"eval_roc_auc": 0.8603407738558333,
"eval_runtime": 520.2063,
"eval_samples_per_second": 5.548,
"eval_steps_per_second": 0.175,
"learning_rate": 0.001,
"step": 4095
},
{
"epoch": 16.0,
"eval_accuracy": 0.2654192654192654,
"eval_f1_macro": 0.6593840515969085,
"eval_f1_micro": 0.7913274487959551,
"eval_loss": 0.13904806971549988,
"eval_roc_auc": 0.856418510343081,
"eval_runtime": 522.3782,
"eval_samples_per_second": 5.525,
"eval_steps_per_second": 0.174,
"learning_rate": 0.001,
"step": 4368
},
{
"epoch": 16.483516483516482,
"grad_norm": 0.16505596041679382,
"learning_rate": 0.001,
"loss": 0.1535,
"step": 4500
},
{
"epoch": 17.0,
"eval_accuracy": 0.2564102564102564,
"eval_f1_macro": 0.6585824628325464,
"eval_f1_micro": 0.7939832128313804,
"eval_loss": 0.1418265849351883,
"eval_roc_auc": 0.8664525383660324,
"eval_runtime": 520.8828,
"eval_samples_per_second": 5.541,
"eval_steps_per_second": 0.175,
"learning_rate": 0.001,
"step": 4641
},
{
"epoch": 18.0,
"eval_accuracy": 0.26576576576576577,
"eval_f1_macro": 0.6560187518750095,
"eval_f1_micro": 0.7957187827911858,
"eval_loss": 0.14155420660972595,
"eval_roc_auc": 0.864558649781785,
"eval_runtime": 521.9656,
"eval_samples_per_second": 5.529,
"eval_steps_per_second": 0.174,
"learning_rate": 0.001,
"step": 4914
},
{
"epoch": 18.315018315018314,
"grad_norm": 0.176731139421463,
"learning_rate": 0.001,
"loss": 0.1549,
"step": 5000
},
{
"epoch": 19.0,
"eval_accuracy": 0.262993762993763,
"eval_f1_macro": 0.6524018082903621,
"eval_f1_micro": 0.7885625699767461,
"eval_loss": 0.14027266204357147,
"eval_roc_auc": 0.8535729424099051,
"eval_runtime": 525.594,
"eval_samples_per_second": 5.491,
"eval_steps_per_second": 0.173,
"learning_rate": 0.001,
"step": 5187
},
{
"epoch": 20.0,
"eval_accuracy": 0.26126126126126126,
"eval_f1_macro": 0.6558190248610255,
"eval_f1_micro": 0.7910696719558615,
"eval_loss": 0.14759798347949982,
"eval_roc_auc": 0.8567849608157283,
"eval_runtime": 533.1376,
"eval_samples_per_second": 5.413,
"eval_steps_per_second": 0.171,
"learning_rate": 0.001,
"step": 5460
},
{
"epoch": 20.146520146520146,
"grad_norm": 0.15767891705036163,
"learning_rate": 0.001,
"loss": 0.154,
"step": 5500
},
{
"epoch": 21.0,
"eval_accuracy": 0.26576576576576577,
"eval_f1_macro": 0.6397027546064713,
"eval_f1_micro": 0.7879767016708474,
"eval_loss": 0.14285211265087128,
"eval_roc_auc": 0.8567511447301636,
"eval_runtime": 527.0011,
"eval_samples_per_second": 5.476,
"eval_steps_per_second": 0.173,
"learning_rate": 0.001,
"step": 5733
},
{
"epoch": 21.978021978021978,
"grad_norm": 0.18300685286521912,
"learning_rate": 0.001,
"loss": 0.1529,
"step": 6000
},
{
"epoch": 22.0,
"eval_accuracy": 0.26126126126126126,
"eval_f1_macro": 0.650810186340724,
"eval_f1_micro": 0.7936799099512236,
"eval_loss": 0.141402930021286,
"eval_roc_auc": 0.8653510005054305,
"eval_runtime": 525.9127,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 0.173,
"learning_rate": 0.001,
"step": 6006
},
{
"epoch": 23.0,
"eval_accuracy": 0.26853776853776856,
"eval_f1_macro": 0.6618136826297922,
"eval_f1_micro": 0.7975794766896787,
"eval_loss": 0.1415141373872757,
"eval_roc_auc": 0.8613092204030781,
"eval_runtime": 530.5247,
"eval_samples_per_second": 5.44,
"eval_steps_per_second": 0.172,
"learning_rate": 0.0001,
"step": 6279
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.16848017275333405,
"learning_rate": 0.0001,
"loss": 0.1449,
"step": 6500
},
{
"epoch": 24.0,
"eval_accuracy": 0.27893277893277896,
"eval_f1_macro": 0.6750686264509598,
"eval_f1_micro": 0.8044778018063861,
"eval_loss": 0.13230843842029572,
"eval_roc_auc": 0.8664561198395929,
"eval_runtime": 521.5756,
"eval_samples_per_second": 5.533,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 6552
},
{
"epoch": 25.0,
"eval_accuracy": 0.27927927927927926,
"eval_f1_macro": 0.6724022117445357,
"eval_f1_micro": 0.8044072500946213,
"eval_loss": 0.13101588189601898,
"eval_roc_auc": 0.868781233937024,
"eval_runtime": 523.3306,
"eval_samples_per_second": 5.515,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 6825
},
{
"epoch": 25.641025641025642,
"grad_norm": 0.16336454451084137,
"learning_rate": 0.0001,
"loss": 0.1416,
"step": 7000
},
{
"epoch": 26.0,
"eval_accuracy": 0.28205128205128205,
"eval_f1_macro": 0.6689442300740391,
"eval_f1_micro": 0.8035965398218775,
"eval_loss": 0.13268393278121948,
"eval_roc_auc": 0.8645798435204571,
"eval_runtime": 532.8406,
"eval_samples_per_second": 5.416,
"eval_steps_per_second": 0.171,
"learning_rate": 0.0001,
"step": 7098
},
{
"epoch": 27.0,
"eval_accuracy": 0.2817047817047817,
"eval_f1_macro": 0.679681812643572,
"eval_f1_micro": 0.8068647969861867,
"eval_loss": 0.1317097693681717,
"eval_roc_auc": 0.8714747032608311,
"eval_runtime": 527.4278,
"eval_samples_per_second": 5.472,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 7371
},
{
"epoch": 27.47252747252747,
"grad_norm": 0.1572931855916977,
"learning_rate": 0.0001,
"loss": 0.1391,
"step": 7500
},
{
"epoch": 28.0,
"eval_accuracy": 0.27754677754677753,
"eval_f1_macro": 0.6818462300001074,
"eval_f1_micro": 0.8072126727334008,
"eval_loss": 0.12880520522594452,
"eval_roc_auc": 0.8697994857701482,
"eval_runtime": 536.9046,
"eval_samples_per_second": 5.375,
"eval_steps_per_second": 0.169,
"learning_rate": 0.0001,
"step": 7644
},
{
"epoch": 29.0,
"eval_accuracy": 0.2844767844767845,
"eval_f1_macro": 0.6807929806344717,
"eval_f1_micro": 0.8038088702067427,
"eval_loss": 0.12942521274089813,
"eval_roc_auc": 0.8628519636133017,
"eval_runtime": 520.5065,
"eval_samples_per_second": 5.545,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 7917
},
{
"epoch": 29.304029304029303,
"grad_norm": 0.19199338555335999,
"learning_rate": 0.0001,
"loss": 0.138,
"step": 8000
},
{
"epoch": 30.0,
"eval_accuracy": 0.28586278586278585,
"eval_f1_macro": 0.6825529208005033,
"eval_f1_micro": 0.8077149835761811,
"eval_loss": 0.12943296134471893,
"eval_roc_auc": 0.8701959964759374,
"eval_runtime": 543.5755,
"eval_samples_per_second": 5.309,
"eval_steps_per_second": 0.167,
"learning_rate": 0.0001,
"step": 8190
},
{
"epoch": 31.0,
"eval_accuracy": 0.28794178794178793,
"eval_f1_macro": 0.6779122940127521,
"eval_f1_micro": 0.8073808915025994,
"eval_loss": 0.12738928198814392,
"eval_roc_auc": 0.8666172459085354,
"eval_runtime": 521.4164,
"eval_samples_per_second": 5.535,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 8463
},
{
"epoch": 31.135531135531135,
"grad_norm": 0.1997932642698288,
"learning_rate": 0.0001,
"loss": 0.1364,
"step": 8500
},
{
"epoch": 32.0,
"eval_accuracy": 0.2882882882882883,
"eval_f1_macro": 0.6868638344898197,
"eval_f1_micro": 0.8104185890445432,
"eval_loss": 0.12775012850761414,
"eval_roc_auc": 0.8728485806633693,
"eval_runtime": 519.8308,
"eval_samples_per_second": 5.552,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 8736
},
{
"epoch": 32.967032967032964,
"grad_norm": 0.19476589560508728,
"learning_rate": 0.0001,
"loss": 0.1359,
"step": 9000
},
{
"epoch": 33.0,
"eval_accuracy": 0.2869022869022869,
"eval_f1_macro": 0.6810807224403135,
"eval_f1_micro": 0.8077248140635565,
"eval_loss": 0.12765593826770782,
"eval_roc_auc": 0.8692062891212271,
"eval_runtime": 514.7142,
"eval_samples_per_second": 5.607,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 9009
},
{
"epoch": 34.0,
"eval_accuracy": 0.2882882882882883,
"eval_f1_macro": 0.687361527737602,
"eval_f1_micro": 0.8108837797932926,
"eval_loss": 0.12660712003707886,
"eval_roc_auc": 0.8714320206807965,
"eval_runtime": 514.9645,
"eval_samples_per_second": 5.604,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 9282
},
{
"epoch": 34.798534798534796,
"grad_norm": 0.2034957855939865,
"learning_rate": 0.0001,
"loss": 0.1341,
"step": 9500
},
{
"epoch": 35.0,
"eval_accuracy": 0.29036729036729036,
"eval_f1_macro": 0.688483181989703,
"eval_f1_micro": 0.8103963941193815,
"eval_loss": 0.1262102574110031,
"eval_roc_auc": 0.8715800817488106,
"eval_runtime": 525.0872,
"eval_samples_per_second": 5.496,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 9555
},
{
"epoch": 36.0,
"eval_accuracy": 0.28274428274428276,
"eval_f1_macro": 0.6876394944988364,
"eval_f1_micro": 0.8070400273399119,
"eval_loss": 0.12687553465366364,
"eval_roc_auc": 0.8657418371913091,
"eval_runtime": 513.0757,
"eval_samples_per_second": 5.625,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 9828
},
{
"epoch": 36.63003663003663,
"grad_norm": 0.20557202398777008,
"learning_rate": 0.0001,
"loss": 0.1339,
"step": 10000
},
{
"epoch": 37.0,
"eval_accuracy": 0.28655578655578656,
"eval_f1_macro": 0.6833930255395054,
"eval_f1_micro": 0.8081597960050999,
"eval_loss": 0.12656189501285553,
"eval_roc_auc": 0.8678163688633396,
"eval_runtime": 515.3436,
"eval_samples_per_second": 5.6,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 10101
},
{
"epoch": 38.0,
"eval_accuracy": 0.2955647955647956,
"eval_f1_macro": 0.6936175483283518,
"eval_f1_micro": 0.8106371284826448,
"eval_loss": 0.12547720968723297,
"eval_roc_auc": 0.8706625538294134,
"eval_runtime": 512.1358,
"eval_samples_per_second": 5.635,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 10374
},
{
"epoch": 38.46153846153846,
"grad_norm": 0.2112371176481247,
"learning_rate": 0.0001,
"loss": 0.1307,
"step": 10500
},
{
"epoch": 39.0,
"eval_accuracy": 0.2927927927927928,
"eval_f1_macro": 0.6985657340894045,
"eval_f1_micro": 0.8141880626875626,
"eval_loss": 0.12485096603631973,
"eval_roc_auc": 0.8767653445350737,
"eval_runtime": 512.6109,
"eval_samples_per_second": 5.63,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 10647
},
{
"epoch": 40.0,
"eval_accuracy": 0.2934857934857935,
"eval_f1_macro": 0.6989554260935754,
"eval_f1_micro": 0.8138017044273539,
"eval_loss": 0.1257668137550354,
"eval_roc_auc": 0.8773247787534647,
"eval_runtime": 513.8833,
"eval_samples_per_second": 5.616,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 10920
},
{
"epoch": 40.29304029304029,
"grad_norm": 0.23032954335212708,
"learning_rate": 0.0001,
"loss": 0.1317,
"step": 11000
},
{
"epoch": 41.0,
"eval_accuracy": 0.29244629244629244,
"eval_f1_macro": 0.6923923602014324,
"eval_f1_micro": 0.8101351925856646,
"eval_loss": 0.12528541684150696,
"eval_roc_auc": 0.8687915491174283,
"eval_runtime": 513.0005,
"eval_samples_per_second": 5.626,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 11193
},
{
"epoch": 42.0,
"eval_accuracy": 0.3004158004158004,
"eval_f1_macro": 0.6970236383039276,
"eval_f1_micro": 0.8138018093835474,
"eval_loss": 0.12443084269762039,
"eval_roc_auc": 0.8737649281720051,
"eval_runtime": 525.5315,
"eval_samples_per_second": 5.492,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 11466
},
{
"epoch": 42.124542124542124,
"grad_norm": 0.23487386107444763,
"learning_rate": 0.0001,
"loss": 0.1308,
"step": 11500
},
{
"epoch": 43.0,
"eval_accuracy": 0.2948717948717949,
"eval_f1_macro": 0.6956334056896907,
"eval_f1_micro": 0.8131470414948238,
"eval_loss": 0.12451612949371338,
"eval_roc_auc": 0.8733690344991142,
"eval_runtime": 514.3778,
"eval_samples_per_second": 5.611,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 11739
},
{
"epoch": 43.956043956043956,
"grad_norm": 0.25621357560157776,
"learning_rate": 0.0001,
"loss": 0.1307,
"step": 12000
},
{
"epoch": 44.0,
"eval_accuracy": 0.2966042966042966,
"eval_f1_macro": 0.6915470420512126,
"eval_f1_micro": 0.812950847173293,
"eval_loss": 0.12501148879528046,
"eval_roc_auc": 0.8742664283667729,
"eval_runtime": 519.3764,
"eval_samples_per_second": 5.557,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 12012
},
{
"epoch": 45.0,
"eval_accuracy": 0.29625779625779625,
"eval_f1_macro": 0.7050548840380568,
"eval_f1_micro": 0.8136846971798428,
"eval_loss": 0.12397606670856476,
"eval_roc_auc": 0.8740443367647517,
"eval_runtime": 515.8997,
"eval_samples_per_second": 5.594,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 12285
},
{
"epoch": 45.78754578754579,
"grad_norm": 0.22914335131645203,
"learning_rate": 0.0001,
"loss": 0.1295,
"step": 12500
},
{
"epoch": 46.0,
"eval_accuracy": 0.29764379764379767,
"eval_f1_macro": 0.6987723620069867,
"eval_f1_micro": 0.8130628734954971,
"eval_loss": 0.12409698963165283,
"eval_roc_auc": 0.8733228777555885,
"eval_runtime": 516.6269,
"eval_samples_per_second": 5.586,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 12558
},
{
"epoch": 47.0,
"eval_accuracy": 0.2955647955647956,
"eval_f1_macro": 0.6957628076563835,
"eval_f1_micro": 0.811911298838437,
"eval_loss": 0.12429661303758621,
"eval_roc_auc": 0.8716271908692008,
"eval_runtime": 518.0917,
"eval_samples_per_second": 5.57,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 12831
},
{
"epoch": 47.61904761904762,
"grad_norm": 0.25639113783836365,
"learning_rate": 0.0001,
"loss": 0.1293,
"step": 13000
},
{
"epoch": 48.0,
"eval_accuracy": 0.2955647955647956,
"eval_f1_macro": 0.6990296569974817,
"eval_f1_micro": 0.8135280295401142,
"eval_loss": 0.12393072247505188,
"eval_roc_auc": 0.874436809929186,
"eval_runtime": 517.4634,
"eval_samples_per_second": 5.577,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 13104
},
{
"epoch": 49.0,
"eval_accuracy": 0.29972279972279975,
"eval_f1_macro": 0.7007060102949784,
"eval_f1_micro": 0.8152993625265614,
"eval_loss": 0.1242954283952713,
"eval_roc_auc": 0.8774914581184896,
"eval_runtime": 511.3749,
"eval_samples_per_second": 5.644,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 13377
},
{
"epoch": 49.45054945054945,
"grad_norm": 0.27197974920272827,
"learning_rate": 0.0001,
"loss": 0.1274,
"step": 13500
},
{
"epoch": 50.0,
"eval_accuracy": 0.29799029799029797,
"eval_f1_macro": 0.6999734070385492,
"eval_f1_micro": 0.8151919866444074,
"eval_loss": 0.12405084818601608,
"eval_roc_auc": 0.8769273693258459,
"eval_runtime": 509.3276,
"eval_samples_per_second": 5.666,
"eval_steps_per_second": 0.179,
"learning_rate": 0.0001,
"step": 13650
},
{
"epoch": 51.0,
"eval_accuracy": 0.3011088011088011,
"eval_f1_macro": 0.7055935576453343,
"eval_f1_micro": 0.8153039745759215,
"eval_loss": 0.12483017891645432,
"eval_roc_auc": 0.8803007418345086,
"eval_runtime": 511.0056,
"eval_samples_per_second": 5.648,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 13923
},
{
"epoch": 51.282051282051285,
"grad_norm": 0.23091430962085724,
"learning_rate": 0.0001,
"loss": 0.1271,
"step": 14000
},
{
"epoch": 52.0,
"eval_accuracy": 0.3049203049203049,
"eval_f1_macro": 0.7035566403965832,
"eval_f1_micro": 0.8157241959217996,
"eval_loss": 0.12426182627677917,
"eval_roc_auc": 0.8750656737623661,
"eval_runtime": 511.1647,
"eval_samples_per_second": 5.646,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 14196
},
{
"epoch": 53.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.7031528349086803,
"eval_f1_micro": 0.8152648882600192,
"eval_loss": 0.12408608943223953,
"eval_roc_auc": 0.8778170234547618,
"eval_runtime": 520.4,
"eval_samples_per_second": 5.546,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 14469
},
{
"epoch": 53.11355311355312,
"grad_norm": 0.23177389800548553,
"learning_rate": 0.0001,
"loss": 0.1275,
"step": 14500
},
{
"epoch": 54.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.7067666695453366,
"eval_f1_micro": 0.8152251458307105,
"eval_loss": 0.12344320118427277,
"eval_roc_auc": 0.8753333050750151,
"eval_runtime": 522.8329,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 14742
},
{
"epoch": 54.94505494505494,
"grad_norm": 0.3403611481189728,
"learning_rate": 0.0001,
"loss": 0.1256,
"step": 15000
},
{
"epoch": 55.0,
"eval_accuracy": 0.30180180180180183,
"eval_f1_macro": 0.7075536762185066,
"eval_f1_micro": 0.8166332665330662,
"eval_loss": 0.12307523190975189,
"eval_roc_auc": 0.8776256091187804,
"eval_runtime": 513.5394,
"eval_samples_per_second": 5.62,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 15015
},
{
"epoch": 56.0,
"eval_accuracy": 0.30665280665280664,
"eval_f1_macro": 0.7087921855865761,
"eval_f1_micro": 0.8189626693095475,
"eval_loss": 0.12282071262598038,
"eval_roc_auc": 0.8821854285803199,
"eval_runtime": 519.2592,
"eval_samples_per_second": 5.558,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 15288
},
{
"epoch": 56.776556776556774,
"grad_norm": 0.28649473190307617,
"learning_rate": 0.0001,
"loss": 0.1258,
"step": 15500
},
{
"epoch": 57.0,
"eval_accuracy": 0.306999306999307,
"eval_f1_macro": 0.7079839879234633,
"eval_f1_micro": 0.8160328019748128,
"eval_loss": 0.12259934842586517,
"eval_roc_auc": 0.8766650096203477,
"eval_runtime": 523.8952,
"eval_samples_per_second": 5.509,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 15561
},
{
"epoch": 58.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.7072503847729165,
"eval_f1_micro": 0.8170145133631687,
"eval_loss": 0.12334763258695602,
"eval_roc_auc": 0.8773053153896588,
"eval_runtime": 522.7463,
"eval_samples_per_second": 5.521,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 15834
},
{
"epoch": 58.608058608058606,
"grad_norm": 0.2677023112773895,
"learning_rate": 0.0001,
"loss": 0.1258,
"step": 16000
},
{
"epoch": 59.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.713532815646164,
"eval_f1_micro": 0.8172105834237543,
"eval_loss": 0.12272054702043533,
"eval_roc_auc": 0.8780682765680952,
"eval_runtime": 524.8476,
"eval_samples_per_second": 5.499,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 16107
},
{
"epoch": 60.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.7039801220819605,
"eval_f1_micro": 0.8142579609764339,
"eval_loss": 0.12334387749433517,
"eval_roc_auc": 0.8729462194126062,
"eval_runtime": 526.97,
"eval_samples_per_second": 5.477,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 16380
},
{
"epoch": 60.43956043956044,
"grad_norm": 0.273879736661911,
"learning_rate": 0.0001,
"loss": 0.1252,
"step": 16500
},
{
"epoch": 61.0,
"eval_accuracy": 0.3042273042273042,
"eval_f1_macro": 0.7120578542808926,
"eval_f1_micro": 0.816814564846061,
"eval_loss": 0.12339764833450317,
"eval_roc_auc": 0.8783554248995846,
"eval_runtime": 524.4656,
"eval_samples_per_second": 5.503,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 16653
},
{
"epoch": 62.0,
"eval_accuracy": 0.3049203049203049,
"eval_f1_macro": 0.7124854785684515,
"eval_f1_micro": 0.8169309505831026,
"eval_loss": 0.12234435975551605,
"eval_roc_auc": 0.876382515863111,
"eval_runtime": 518.389,
"eval_samples_per_second": 5.567,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 16926
},
{
"epoch": 62.27106227106227,
"grad_norm": 0.2836596667766571,
"learning_rate": 0.0001,
"loss": 0.1238,
"step": 17000
},
{
"epoch": 63.0,
"eval_accuracy": 0.30353430353430355,
"eval_f1_macro": 0.709030237195192,
"eval_f1_micro": 0.8151443922095366,
"eval_loss": 0.12311259657144547,
"eval_roc_auc": 0.875227363209172,
"eval_runtime": 523.1406,
"eval_samples_per_second": 5.517,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 17199
},
{
"epoch": 64.0,
"eval_accuracy": 0.30665280665280664,
"eval_f1_macro": 0.7114197657112039,
"eval_f1_micro": 0.8183222681531587,
"eval_loss": 0.12282687425613403,
"eval_roc_auc": 0.8785221042646094,
"eval_runtime": 525.9879,
"eval_samples_per_second": 5.487,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 17472
},
{
"epoch": 64.1025641025641,
"grad_norm": 0.327009916305542,
"learning_rate": 0.0001,
"loss": 0.1247,
"step": 17500
},
{
"epoch": 65.0,
"eval_accuracy": 0.30353430353430355,
"eval_f1_macro": 0.715610525327271,
"eval_f1_micro": 0.8185065204751224,
"eval_loss": 0.12305620312690735,
"eval_roc_auc": 0.8802214933483853,
"eval_runtime": 527.6963,
"eval_samples_per_second": 5.469,
"eval_steps_per_second": 0.172,
"learning_rate": 0.0001,
"step": 17745
},
{
"epoch": 65.93406593406593,
"grad_norm": 0.3439556360244751,
"learning_rate": 0.0001,
"loss": 0.123,
"step": 18000
},
{
"epoch": 66.0,
"eval_accuracy": 0.30214830214830213,
"eval_f1_macro": 0.7083957677770276,
"eval_f1_micro": 0.8193021036471515,
"eval_loss": 0.12252139300107956,
"eval_roc_auc": 0.8809488409975973,
"eval_runtime": 523.6027,
"eval_samples_per_second": 5.512,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 18018
},
{
"epoch": 67.0,
"eval_accuracy": 0.3031878031878032,
"eval_f1_macro": 0.713563304331985,
"eval_f1_micro": 0.8185542268382505,
"eval_loss": 0.12215397506952286,
"eval_roc_auc": 0.8813502879665707,
"eval_runtime": 528.5406,
"eval_samples_per_second": 5.46,
"eval_steps_per_second": 0.172,
"learning_rate": 0.0001,
"step": 18291
},
{
"epoch": 67.76556776556777,
"grad_norm": 0.3434881269931793,
"learning_rate": 0.0001,
"loss": 0.1224,
"step": 18500
},
{
"epoch": 68.0,
"eval_accuracy": 0.3090783090783091,
"eval_f1_macro": 0.7169216330412181,
"eval_f1_micro": 0.8201218248870841,
"eval_loss": 0.12200037389993668,
"eval_roc_auc": 0.8818022645643908,
"eval_runtime": 525.6971,
"eval_samples_per_second": 5.49,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 18564
},
{
"epoch": 69.0,
"eval_accuracy": 0.30180180180180183,
"eval_f1_macro": 0.7165157275423649,
"eval_f1_micro": 0.8171493231633209,
"eval_loss": 0.12282921373844147,
"eval_roc_auc": 0.8767867663076429,
"eval_runtime": 539.1574,
"eval_samples_per_second": 5.353,
"eval_steps_per_second": 0.169,
"learning_rate": 0.0001,
"step": 18837
},
{
"epoch": 69.59706959706959,
"grad_norm": 0.2773456275463104,
"learning_rate": 0.0001,
"loss": 0.1228,
"step": 19000
},
{
"epoch": 70.0,
"eval_accuracy": 0.3042273042273042,
"eval_f1_macro": 0.7130922408537738,
"eval_f1_micro": 0.8176893032631977,
"eval_loss": 0.12265007942914963,
"eval_roc_auc": 0.8764658555456234,
"eval_runtime": 532.0042,
"eval_samples_per_second": 5.425,
"eval_steps_per_second": 0.171,
"learning_rate": 0.0001,
"step": 19110
},
{
"epoch": 71.0,
"eval_accuracy": 0.29799029799029797,
"eval_f1_macro": 0.7123118599173115,
"eval_f1_micro": 0.8155257705805251,
"eval_loss": 0.12318737804889679,
"eval_roc_auc": 0.8733064995562728,
"eval_runtime": 512.5227,
"eval_samples_per_second": 5.631,
"eval_steps_per_second": 0.178,
"learning_rate": 0.0001,
"step": 19383
},
{
"epoch": 71.42857142857143,
"grad_norm": 0.32921841740608215,
"learning_rate": 0.0001,
"loss": 0.1224,
"step": 19500
},
{
"epoch": 72.0,
"eval_accuracy": 0.30561330561330563,
"eval_f1_macro": 0.7181217472368024,
"eval_f1_micro": 0.8177146438270315,
"eval_loss": 0.12224896252155304,
"eval_roc_auc": 0.8780131460200304,
"eval_runtime": 526.8353,
"eval_samples_per_second": 5.478,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 19656
},
{
"epoch": 73.0,
"eval_accuracy": 0.3076923076923077,
"eval_f1_macro": 0.7046690012290543,
"eval_f1_micro": 0.8161570403926011,
"eval_loss": 0.12214501202106476,
"eval_roc_auc": 0.8759937448960649,
"eval_runtime": 523.4325,
"eval_samples_per_second": 5.514,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 19929
},
{
"epoch": 73.26007326007326,
"grad_norm": 0.27500712871551514,
"learning_rate": 0.0001,
"loss": 0.122,
"step": 20000
},
{
"epoch": 74.0,
"eval_accuracy": 0.2972972972972973,
"eval_f1_macro": 0.7070482653980339,
"eval_f1_micro": 0.8147835269271382,
"eval_loss": 0.12297073751688004,
"eval_roc_auc": 0.8731965201490751,
"eval_runtime": 521.6748,
"eval_samples_per_second": 5.532,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 20202
},
{
"epoch": 75.0,
"eval_accuracy": 0.3049203049203049,
"eval_f1_macro": 0.7123584497861349,
"eval_f1_micro": 0.8175831550689987,
"eval_loss": 0.12141965329647064,
"eval_roc_auc": 0.876778409536002,
"eval_runtime": 521.9582,
"eval_samples_per_second": 5.529,
"eval_steps_per_second": 0.174,
"learning_rate": 1e-05,
"step": 20475
},
{
"epoch": 75.0915750915751,
"grad_norm": 0.34586936235427856,
"learning_rate": 1e-05,
"loss": 0.1201,
"step": 20500
},
{
"epoch": 76.0,
"eval_accuracy": 0.30665280665280664,
"eval_f1_macro": 0.7265282519195887,
"eval_f1_micro": 0.8212704324436167,
"eval_loss": 0.12091591954231262,
"eval_roc_auc": 0.8828403151052873,
"eval_runtime": 515.687,
"eval_samples_per_second": 5.596,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 20748
},
{
"epoch": 76.92307692307692,
"grad_norm": 0.3650946617126465,
"learning_rate": 1e-05,
"loss": 0.1192,
"step": 21000
},
{
"epoch": 77.0,
"eval_accuracy": 0.30734580734580735,
"eval_f1_macro": 0.7249141687532618,
"eval_f1_micro": 0.8221009885557243,
"eval_loss": 0.12162773311138153,
"eval_roc_auc": 0.88597146196019,
"eval_runtime": 505.8066,
"eval_samples_per_second": 5.706,
"eval_steps_per_second": 0.18,
"learning_rate": 1e-05,
"step": 21021
},
{
"epoch": 78.0,
"eval_accuracy": 0.30561330561330563,
"eval_f1_macro": 0.7232913822219021,
"eval_f1_micro": 0.821013443640124,
"eval_loss": 0.12114103883504868,
"eval_roc_auc": 0.8828214151193448,
"eval_runtime": 515.423,
"eval_samples_per_second": 5.599,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 21294
},
{
"epoch": 78.75457875457876,
"grad_norm": 0.3805921673774719,
"learning_rate": 1e-05,
"loss": 0.1178,
"step": 21500
},
{
"epoch": 79.0,
"eval_accuracy": 0.30561330561330563,
"eval_f1_macro": 0.7157592534107864,
"eval_f1_micro": 0.8181284095677717,
"eval_loss": 0.1210767850279808,
"eval_roc_auc": 0.8769422854254683,
"eval_runtime": 524.7026,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 0.173,
"learning_rate": 1e-05,
"step": 21567
},
{
"epoch": 80.0,
"eval_accuracy": 0.3090783090783091,
"eval_f1_macro": 0.7196736600383237,
"eval_f1_micro": 0.8200463116109824,
"eval_loss": 0.12099559605121613,
"eval_roc_auc": 0.8823936101146178,
"eval_runtime": 518.5996,
"eval_samples_per_second": 5.565,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 21840
},
{
"epoch": 80.58608058608058,
"grad_norm": 0.38496658205986023,
"learning_rate": 1e-05,
"loss": 0.1178,
"step": 22000
},
{
"epoch": 81.0,
"eval_accuracy": 0.31046431046431044,
"eval_f1_macro": 0.7194056763702963,
"eval_f1_micro": 0.8189727287937092,
"eval_loss": 0.12053155153989792,
"eval_roc_auc": 0.8783734261636972,
"eval_runtime": 517.5249,
"eval_samples_per_second": 5.577,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 22113
},
{
"epoch": 82.0,
"eval_accuracy": 0.306999306999307,
"eval_f1_macro": 0.7212694332008583,
"eval_f1_micro": 0.8186875235267054,
"eval_loss": 0.12050338089466095,
"eval_roc_auc": 0.8782284502601733,
"eval_runtime": 511.5713,
"eval_samples_per_second": 5.641,
"eval_steps_per_second": 0.178,
"learning_rate": 1e-05,
"step": 22386
},
{
"epoch": 82.41758241758242,
"grad_norm": 0.29807013273239136,
"learning_rate": 1e-05,
"loss": 0.1162,
"step": 22500
},
{
"epoch": 83.0,
"eval_accuracy": 0.3049203049203049,
"eval_f1_macro": 0.7136069207682542,
"eval_f1_micro": 0.817129142279675,
"eval_loss": 0.12153622508049011,
"eval_roc_auc": 0.8753921914755026,
"eval_runtime": 514.5554,
"eval_samples_per_second": 5.609,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 22659
},
{
"epoch": 84.0,
"eval_accuracy": 0.3115038115038115,
"eval_f1_macro": 0.72263281374496,
"eval_f1_micro": 0.8212135055442501,
"eval_loss": 0.12091034650802612,
"eval_roc_auc": 0.8817381602117871,
"eval_runtime": 514.2801,
"eval_samples_per_second": 5.612,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 22932
},
{
"epoch": 84.24908424908425,
"grad_norm": 0.4926730692386627,
"learning_rate": 1e-05,
"loss": 0.1174,
"step": 23000
},
{
"epoch": 85.0,
"eval_accuracy": 0.30942480942480943,
"eval_f1_macro": 0.7219026145386024,
"eval_f1_micro": 0.8212908842183808,
"eval_loss": 0.12058679759502411,
"eval_roc_auc": 0.8823288887291161,
"eval_runtime": 513.2258,
"eval_samples_per_second": 5.623,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 23205
},
{
"epoch": 86.0,
"eval_accuracy": 0.30838530838530837,
"eval_f1_macro": 0.7255503995321377,
"eval_f1_micro": 0.8206727371003285,
"eval_loss": 0.1210218220949173,
"eval_roc_auc": 0.8810894976708349,
"eval_runtime": 516.6336,
"eval_samples_per_second": 5.586,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 23478
},
{
"epoch": 86.08058608058609,
"grad_norm": 0.3941400647163391,
"learning_rate": 1e-05,
"loss": 0.1167,
"step": 23500
},
{
"epoch": 87.0,
"eval_accuracy": 0.30734580734580735,
"eval_f1_macro": 0.7163464112504625,
"eval_f1_micro": 0.81919187715867,
"eval_loss": 0.12097787857055664,
"eval_roc_auc": 0.8800195357981024,
"eval_runtime": 514.309,
"eval_samples_per_second": 5.611,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 23751
},
{
"epoch": 87.91208791208791,
"grad_norm": 0.30446189641952515,
"learning_rate": 1e-05,
"loss": 0.116,
"step": 24000
},
{
"epoch": 88.0,
"eval_accuracy": 0.30942480942480943,
"eval_f1_macro": 0.7179611359738045,
"eval_f1_micro": 0.8219223445649475,
"eval_loss": 0.12078534066677094,
"eval_roc_auc": 0.8831166385335435,
"eval_runtime": 513.9044,
"eval_samples_per_second": 5.616,
"eval_steps_per_second": 0.177,
"learning_rate": 1e-05,
"step": 24024
},
{
"epoch": 89.0,
"eval_accuracy": 0.3125433125433125,
"eval_f1_macro": 0.7293063087262872,
"eval_f1_micro": 0.8235824319895118,
"eval_loss": 0.1213160827755928,
"eval_roc_auc": 0.8871674997505042,
"eval_runtime": 514.1086,
"eval_samples_per_second": 5.614,
"eval_steps_per_second": 0.177,
"learning_rate": 1.0000000000000002e-06,
"step": 24297
},
{
"epoch": 89.74358974358974,
"grad_norm": 0.2996889054775238,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1161,
"step": 24500
},
{
"epoch": 90.0,
"eval_accuracy": 0.3108108108108108,
"eval_f1_macro": 0.7249894355418997,
"eval_f1_micro": 0.8228019165403988,
"eval_loss": 0.12110408395528793,
"eval_roc_auc": 0.8868651536304606,
"eval_runtime": 511.9928,
"eval_samples_per_second": 5.637,
"eval_steps_per_second": 0.178,
"learning_rate": 1.0000000000000002e-06,
"step": 24570
},
{
"epoch": 91.0,
"eval_accuracy": 0.31046431046431044,
"eval_f1_macro": 0.7187027508297176,
"eval_f1_micro": 0.8191074795725959,
"eval_loss": 0.1205781027674675,
"eval_roc_auc": 0.8779146622039986,
"eval_runtime": 513.1929,
"eval_samples_per_second": 5.624,
"eval_steps_per_second": 0.177,
"learning_rate": 1.0000000000000002e-06,
"step": 24843
},
{
"epoch": 91.57509157509158,
"grad_norm": 0.30445897579193115,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1162,
"step": 25000
},
{
"epoch": 92.0,
"eval_accuracy": 0.31046431046431044,
"eval_f1_macro": 0.7150284118631205,
"eval_f1_micro": 0.8196009683612989,
"eval_loss": 0.12076584249734879,
"eval_roc_auc": 0.8793405313350767,
"eval_runtime": 520.7671,
"eval_samples_per_second": 5.542,
"eval_steps_per_second": 0.175,
"learning_rate": 1.0000000000000002e-06,
"step": 25116
},
{
"epoch": 92.0,
"learning_rate": 1.0000000000000002e-06,
"step": 25116,
"total_flos": 1.1890234809282512e+21,
"train_loss": 0.1360613288991788,
"train_runtime": 194834.2342,
"train_samples_per_second": 6.71,
"train_steps_per_second": 0.21
}
],
"logging_steps": 500,
"max_steps": 40950,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1890234809282512e+21,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}