fullstop-welsh-punctuation-prediction / model_final_suite_results_task2.json
DewiBrynJones's picture
hyfforddi gyda rhagor o ddata / train with more data
7578b01
raw
history blame
86.1 kB
{
"tests": {
"22": {
"id": 22,
"task": 2,
"model": "xlm-roberta-large",
"languages": [
"cy"
],
"augmentation": [
""
],
"data_percentage": 1,
"use_token_type_ids": false,
"tokenizer_config": {
"strip_accent": false,
"add_prefix_space": true
},
"opimizer_config": {
"adafactor": true,
"num_train_epochs": 2
},
"result": [
{
"loss": 1.882,
"grad_norm": Infinity,
"learning_rate": 0.0,
"epoch": 0.0,
"step": 1
},
{
"loss": 0.5285,
"grad_norm": 2.4953722953796387,
"learning_rate": 3.99453087019932e-05,
"epoch": 0.01,
"step": 100
},
{
"loss": 0.1702,
"grad_norm": 1.8322360515594482,
"learning_rate": 3.982377248420029e-05,
"epoch": 0.01,
"step": 200
},
{
"loss": 0.1472,
"grad_norm": 1.7121275663375854,
"learning_rate": 3.970223626640739e-05,
"epoch": 0.02,
"step": 300
},
{
"loss": 0.1342,
"grad_norm": 1.7097556591033936,
"learning_rate": 3.958070004861449e-05,
"epoch": 0.02,
"step": 400
},
{
"loss": 0.1288,
"grad_norm": 1.57424795627594,
"learning_rate": 3.9459163830821586e-05,
"epoch": 0.03,
"step": 500
},
{
"loss": 0.1247,
"grad_norm": 1.7552311420440674,
"learning_rate": 3.9337627613028686e-05,
"epoch": 0.04,
"step": 600
},
{
"loss": 0.1218,
"grad_norm": 1.6224812269210815,
"learning_rate": 3.9216091395235786e-05,
"epoch": 0.04,
"step": 700
},
{
"loss": 0.1176,
"grad_norm": 1.8368713855743408,
"learning_rate": 3.909455517744288e-05,
"epoch": 0.05,
"step": 800
},
{
"loss": 0.1119,
"grad_norm": 1.4631482362747192,
"learning_rate": 3.897301895964998e-05,
"epoch": 0.05,
"step": 900
},
{
"loss": 0.1098,
"grad_norm": 1.2774118185043335,
"learning_rate": 3.885148274185708e-05,
"epoch": 0.06,
"step": 1000
},
{
"loss": 0.1083,
"grad_norm": 1.187245488166809,
"learning_rate": 3.872994652406417e-05,
"epoch": 0.07,
"step": 1100
},
{
"loss": 0.1075,
"grad_norm": 1.6492900848388672,
"learning_rate": 3.860841030627127e-05,
"epoch": 0.07,
"step": 1200
},
{
"loss": 0.107,
"grad_norm": 1.4514034986495972,
"learning_rate": 3.8486874088478366e-05,
"epoch": 0.08,
"step": 1300
},
{
"loss": 0.1036,
"grad_norm": 1.0488823652267456,
"learning_rate": 3.8365337870685466e-05,
"epoch": 0.08,
"step": 1400
},
{
"loss": 0.1021,
"grad_norm": 1.5489355325698853,
"learning_rate": 3.8243801652892566e-05,
"epoch": 0.09,
"step": 1500
},
{
"loss": 0.1008,
"grad_norm": 1.2730894088745117,
"learning_rate": 3.812226543509966e-05,
"epoch": 0.1,
"step": 1600
},
{
"loss": 0.1004,
"grad_norm": 1.6920459270477295,
"learning_rate": 3.800072921730676e-05,
"epoch": 0.1,
"step": 1700
},
{
"loss": 0.1006,
"grad_norm": 0.9863981008529663,
"learning_rate": 3.787919299951386e-05,
"epoch": 0.11,
"step": 1800
},
{
"loss": 0.0982,
"grad_norm": 0.9981995820999146,
"learning_rate": 3.775765678172095e-05,
"epoch": 0.12,
"step": 1900
},
{
"loss": 0.0975,
"grad_norm": 1.021620273590088,
"learning_rate": 3.763612056392805e-05,
"epoch": 0.12,
"step": 2000
},
{
"loss": 0.0989,
"grad_norm": 1.2811397314071655,
"learning_rate": 3.751458434613515e-05,
"epoch": 0.13,
"step": 2100
},
{
"loss": 0.0959,
"grad_norm": 1.5976190567016602,
"learning_rate": 3.7393048128342246e-05,
"epoch": 0.13,
"step": 2200
},
{
"loss": 0.0961,
"grad_norm": 0.9754481911659241,
"learning_rate": 3.7271511910549346e-05,
"epoch": 0.14,
"step": 2300
},
{
"loss": 0.0956,
"grad_norm": 0.9418678283691406,
"learning_rate": 3.7149975692756447e-05,
"epoch": 0.15,
"step": 2400
},
{
"loss": 0.0954,
"grad_norm": 1.294745922088623,
"learning_rate": 3.702843947496354e-05,
"epoch": 0.15,
"step": 2500
},
{
"loss": 0.0943,
"grad_norm": 1.3049461841583252,
"learning_rate": 3.690690325717064e-05,
"epoch": 0.16,
"step": 2600
},
{
"loss": 0.0936,
"grad_norm": 1.1144427061080933,
"learning_rate": 3.678536703937774e-05,
"epoch": 0.16,
"step": 2700
},
{
"loss": 0.0939,
"grad_norm": 1.3424856662750244,
"learning_rate": 3.666383082158483e-05,
"epoch": 0.17,
"step": 2800
},
{
"loss": 0.0947,
"grad_norm": 1.123299241065979,
"learning_rate": 3.6542294603791933e-05,
"epoch": 0.18,
"step": 2900
},
{
"loss": 0.0932,
"grad_norm": 1.456009864807129,
"learning_rate": 3.642075838599903e-05,
"epoch": 0.18,
"step": 3000
},
{
"loss": 0.0927,
"grad_norm": 1.4363266229629517,
"learning_rate": 3.629922216820613e-05,
"epoch": 0.19,
"step": 3100
},
{
"loss": 0.0907,
"grad_norm": 0.7776892185211182,
"learning_rate": 3.617768595041323e-05,
"epoch": 0.19,
"step": 3200
},
{
"loss": 0.092,
"grad_norm": 25.731966018676758,
"learning_rate": 3.605614973262032e-05,
"epoch": 0.2,
"step": 3300
},
{
"loss": 0.091,
"grad_norm": 0.9259088039398193,
"learning_rate": 3.593461351482742e-05,
"epoch": 0.21,
"step": 3400
},
{
"loss": 0.0915,
"grad_norm": 0.851094663143158,
"learning_rate": 3.581307729703452e-05,
"epoch": 0.21,
"step": 3500
},
{
"loss": 0.0902,
"grad_norm": 1.5700650215148926,
"learning_rate": 3.5691541079241614e-05,
"epoch": 0.22,
"step": 3600
},
{
"loss": 0.0888,
"grad_norm": 1.13387930393219,
"learning_rate": 3.5570004861448714e-05,
"epoch": 0.22,
"step": 3700
},
{
"loss": 0.089,
"grad_norm": 1.2357937097549438,
"learning_rate": 3.5448468643655814e-05,
"epoch": 0.23,
"step": 3800
},
{
"loss": 0.0898,
"grad_norm": 0.9063655734062195,
"learning_rate": 3.532693242586291e-05,
"epoch": 0.24,
"step": 3900
},
{
"loss": 0.0893,
"grad_norm": 1.1259723901748657,
"learning_rate": 3.520539620807001e-05,
"epoch": 0.24,
"step": 4000
},
{
"loss": 0.0889,
"grad_norm": 0.8327601552009583,
"learning_rate": 3.508385999027711e-05,
"epoch": 0.25,
"step": 4100
},
{
"loss": 0.0862,
"grad_norm": 1.2368316650390625,
"learning_rate": 3.49623237724842e-05,
"epoch": 0.25,
"step": 4200
},
{
"loss": 0.0867,
"grad_norm": 1.1474043130874634,
"learning_rate": 3.48407875546913e-05,
"epoch": 0.26,
"step": 4300
},
{
"loss": 0.0858,
"grad_norm": 0.6887868046760559,
"learning_rate": 3.47192513368984e-05,
"epoch": 0.27,
"step": 4400
},
{
"loss": 0.0877,
"grad_norm": 0.8170347809791565,
"learning_rate": 3.4597715119105494e-05,
"epoch": 0.27,
"step": 4500
},
{
"loss": 0.0871,
"grad_norm": 0.7361243367195129,
"learning_rate": 3.4476178901312594e-05,
"epoch": 0.28,
"step": 4600
},
{
"loss": 0.0878,
"grad_norm": 1.0975162982940674,
"learning_rate": 3.435464268351969e-05,
"epoch": 0.29,
"step": 4700
},
{
"loss": 0.0863,
"grad_norm": 0.931176483631134,
"learning_rate": 3.4233106465726794e-05,
"epoch": 0.29,
"step": 4800
},
{
"loss": 0.0853,
"grad_norm": 1.0259523391723633,
"learning_rate": 3.411157024793389e-05,
"epoch": 0.3,
"step": 4900
},
{
"loss": 0.0876,
"grad_norm": 1.1680504083633423,
"learning_rate": 3.399003403014098e-05,
"epoch": 0.3,
"step": 5000
},
{
"loss": 0.0855,
"grad_norm": 1.2358198165893555,
"learning_rate": 3.386849781234809e-05,
"epoch": 0.31,
"step": 5100
},
{
"loss": 0.085,
"grad_norm": 0.8484376668930054,
"learning_rate": 3.374696159455518e-05,
"epoch": 0.32,
"step": 5200
},
{
"loss": 0.085,
"grad_norm": 1.5419291257858276,
"learning_rate": 3.3625425376762274e-05,
"epoch": 0.32,
"step": 5300
},
{
"loss": 0.0849,
"grad_norm": 1.0334900617599487,
"learning_rate": 3.3503889158969374e-05,
"epoch": 0.33,
"step": 5400
},
{
"loss": 0.0854,
"grad_norm": 1.0367408990859985,
"learning_rate": 3.3382352941176474e-05,
"epoch": 0.33,
"step": 5500
},
{
"loss": 0.0853,
"grad_norm": 0.8429509401321411,
"learning_rate": 3.326081672338357e-05,
"epoch": 0.34,
"step": 5600
},
{
"loss": 0.086,
"grad_norm": 0.9059005379676819,
"learning_rate": 3.313928050559067e-05,
"epoch": 0.35,
"step": 5700
},
{
"loss": 0.0846,
"grad_norm": 1.1803362369537354,
"learning_rate": 3.301774428779777e-05,
"epoch": 0.35,
"step": 5800
},
{
"loss": 0.0817,
"grad_norm": 0.7263641357421875,
"learning_rate": 3.289620807000487e-05,
"epoch": 0.36,
"step": 5900
},
{
"loss": 0.0831,
"grad_norm": 0.8227238655090332,
"learning_rate": 3.277467185221196e-05,
"epoch": 0.36,
"step": 6000
},
{
"loss": 0.0839,
"grad_norm": 1.0349544286727905,
"learning_rate": 3.2653135634419055e-05,
"epoch": 0.37,
"step": 6100
},
{
"loss": 0.0827,
"grad_norm": 0.8446714282035828,
"learning_rate": 3.253159941662616e-05,
"epoch": 0.38,
"step": 6200
},
{
"loss": 0.082,
"grad_norm": 1.1419836282730103,
"learning_rate": 3.2410063198833255e-05,
"epoch": 0.38,
"step": 6300
},
{
"loss": 0.0812,
"grad_norm": 0.9505990147590637,
"learning_rate": 3.228852698104035e-05,
"epoch": 0.39,
"step": 6400
},
{
"loss": 0.0806,
"grad_norm": 1.0036993026733398,
"learning_rate": 3.2166990763247455e-05,
"epoch": 0.39,
"step": 6500
},
{
"loss": 0.0819,
"grad_norm": 0.7694116234779358,
"learning_rate": 3.204545454545455e-05,
"epoch": 0.4,
"step": 6600
},
{
"loss": 0.0818,
"grad_norm": 0.7389699220657349,
"learning_rate": 3.192391832766165e-05,
"epoch": 0.41,
"step": 6700
},
{
"loss": 0.0829,
"grad_norm": 0.8264873623847961,
"learning_rate": 3.180238210986874e-05,
"epoch": 0.41,
"step": 6800
},
{
"loss": 0.0849,
"grad_norm": 0.8844084143638611,
"learning_rate": 3.168084589207584e-05,
"epoch": 0.42,
"step": 6900
},
{
"loss": 0.0816,
"grad_norm": 0.8728023171424866,
"learning_rate": 3.155930967428294e-05,
"epoch": 0.42,
"step": 7000
},
{
"loss": 0.0799,
"grad_norm": 1.218404769897461,
"learning_rate": 3.1437773456490035e-05,
"epoch": 0.43,
"step": 7100
},
{
"loss": 0.0797,
"grad_norm": 0.7085688710212708,
"learning_rate": 3.1316237238697135e-05,
"epoch": 0.44,
"step": 7200
},
{
"loss": 0.0795,
"grad_norm": 0.8446517586708069,
"learning_rate": 3.1194701020904235e-05,
"epoch": 0.44,
"step": 7300
},
{
"loss": 0.0817,
"grad_norm": 1.3226453065872192,
"learning_rate": 3.107316480311133e-05,
"epoch": 0.45,
"step": 7400
},
{
"loss": 0.0816,
"grad_norm": 0.7685155868530273,
"learning_rate": 3.095162858531843e-05,
"epoch": 0.46,
"step": 7500
},
{
"loss": 0.0806,
"grad_norm": 0.7135798335075378,
"learning_rate": 3.083009236752553e-05,
"epoch": 0.46,
"step": 7600
},
{
"loss": 0.0795,
"grad_norm": 1.0276037454605103,
"learning_rate": 3.070855614973262e-05,
"epoch": 0.47,
"step": 7700
},
{
"loss": 0.081,
"grad_norm": 1.1788092851638794,
"learning_rate": 3.058701993193972e-05,
"epoch": 0.47,
"step": 7800
},
{
"loss": 0.0791,
"grad_norm": 1.0305782556533813,
"learning_rate": 3.046548371414682e-05,
"epoch": 0.48,
"step": 7900
},
{
"loss": 0.0805,
"grad_norm": 1.4414223432540894,
"learning_rate": 3.0343947496353915e-05,
"epoch": 0.49,
"step": 8000
},
{
"loss": 0.0799,
"grad_norm": 0.8137165904045105,
"learning_rate": 3.0222411278561012e-05,
"epoch": 0.49,
"step": 8100
},
{
"loss": 0.08,
"grad_norm": 1.1238079071044922,
"learning_rate": 3.0100875060768112e-05,
"epoch": 0.5,
"step": 8200
},
{
"loss": 0.0792,
"grad_norm": 0.9724037647247314,
"learning_rate": 2.997933884297521e-05,
"epoch": 0.5,
"step": 8300
},
{
"loss": 0.0793,
"grad_norm": 1.0247116088867188,
"learning_rate": 2.9857802625182306e-05,
"epoch": 0.51,
"step": 8400
},
{
"loss": 0.0783,
"grad_norm": 1.454062581062317,
"learning_rate": 2.9737481769567335e-05,
"epoch": 0.52,
"step": 8500
},
{
"loss": 0.0788,
"grad_norm": 0.7570217251777649,
"learning_rate": 2.961594555177443e-05,
"epoch": 0.52,
"step": 8600
},
{
"loss": 0.0768,
"grad_norm": 1.1738083362579346,
"learning_rate": 2.9494409333981528e-05,
"epoch": 0.53,
"step": 8700
},
{
"loss": 0.0778,
"grad_norm": 0.7776427268981934,
"learning_rate": 2.9372873116188625e-05,
"epoch": 0.53,
"step": 8800
},
{
"loss": 0.0763,
"grad_norm": 1.226198673248291,
"learning_rate": 2.9251336898395725e-05,
"epoch": 0.54,
"step": 8900
},
{
"loss": 0.0761,
"grad_norm": 0.8859773874282837,
"learning_rate": 2.912980068060282e-05,
"epoch": 0.55,
"step": 9000
},
{
"loss": 0.0765,
"grad_norm": 1.0220259428024292,
"learning_rate": 2.9008264462809918e-05,
"epoch": 0.55,
"step": 9100
},
{
"loss": 0.0777,
"grad_norm": 1.0430243015289307,
"learning_rate": 2.888672824501702e-05,
"epoch": 0.56,
"step": 9200
},
{
"loss": 0.0775,
"grad_norm": 1.1380356550216675,
"learning_rate": 2.8765192027224115e-05,
"epoch": 0.56,
"step": 9300
},
{
"loss": 0.0775,
"grad_norm": 0.6778531670570374,
"learning_rate": 2.8643655809431212e-05,
"epoch": 0.57,
"step": 9400
},
{
"loss": 0.0782,
"grad_norm": 1.0413175821304321,
"learning_rate": 2.852211959163831e-05,
"epoch": 0.58,
"step": 9500
},
{
"loss": 0.0791,
"grad_norm": 1.1399835348129272,
"learning_rate": 2.840058337384541e-05,
"epoch": 0.58,
"step": 9600
},
{
"loss": 0.0763,
"grad_norm": 0.968399703502655,
"learning_rate": 2.8279047156052505e-05,
"epoch": 0.59,
"step": 9700
},
{
"loss": 0.0763,
"grad_norm": 1.0254497528076172,
"learning_rate": 2.8157510938259602e-05,
"epoch": 0.59,
"step": 9800
},
{
"loss": 0.0771,
"grad_norm": 0.8642473220825195,
"learning_rate": 2.8035974720466702e-05,
"epoch": 0.6,
"step": 9900
},
{
"loss": 0.0772,
"grad_norm": 1.1130231618881226,
"learning_rate": 2.79144385026738e-05,
"epoch": 0.61,
"step": 10000
},
{
"loss": 0.0793,
"grad_norm": 1.4455962181091309,
"learning_rate": 2.7792902284880895e-05,
"epoch": 0.61,
"step": 10100
},
{
"loss": 0.077,
"grad_norm": 0.9273576736450195,
"learning_rate": 2.7671366067087992e-05,
"epoch": 0.62,
"step": 10200
},
{
"loss": 0.0766,
"grad_norm": 0.8223456740379333,
"learning_rate": 2.7549829849295092e-05,
"epoch": 0.62,
"step": 10300
},
{
"loss": 0.0765,
"grad_norm": 1.1068949699401855,
"learning_rate": 2.742829363150219e-05,
"epoch": 0.63,
"step": 10400
},
{
"loss": 0.0762,
"grad_norm": 1.0787135362625122,
"learning_rate": 2.7306757413709285e-05,
"epoch": 0.64,
"step": 10500
},
{
"loss": 0.0765,
"grad_norm": 0.6019480228424072,
"learning_rate": 2.7185221195916386e-05,
"epoch": 0.64,
"step": 10600
},
{
"loss": 0.0756,
"grad_norm": 0.7752580046653748,
"learning_rate": 2.7063684978123482e-05,
"epoch": 0.65,
"step": 10700
},
{
"loss": 0.0762,
"grad_norm": 0.9023341536521912,
"learning_rate": 2.6943364122508508e-05,
"epoch": 0.66,
"step": 10800
},
{
"loss": 0.0759,
"grad_norm": 1.1154266595840454,
"learning_rate": 2.6821827904715608e-05,
"epoch": 0.66,
"step": 10900
},
{
"loss": 0.0752,
"grad_norm": 1.5197564363479614,
"learning_rate": 2.6700291686922705e-05,
"epoch": 0.67,
"step": 11000
},
{
"loss": 0.0757,
"grad_norm": 0.8111494183540344,
"learning_rate": 2.65787554691298e-05,
"epoch": 0.67,
"step": 11100
},
{
"loss": 0.0749,
"grad_norm": 0.6413083076477051,
"learning_rate": 2.6457219251336898e-05,
"epoch": 0.68,
"step": 11200
},
{
"loss": 0.0754,
"grad_norm": 0.8996323943138123,
"learning_rate": 2.6335683033544e-05,
"epoch": 0.69,
"step": 11300
},
{
"loss": 0.0744,
"grad_norm": 0.7931196093559265,
"learning_rate": 2.6214146815751095e-05,
"epoch": 0.69,
"step": 11400
},
{
"loss": 0.0742,
"grad_norm": 1.0821586847305298,
"learning_rate": 2.609261059795819e-05,
"epoch": 0.7,
"step": 11500
},
{
"loss": 0.0722,
"grad_norm": 0.9964590072631836,
"learning_rate": 2.5971074380165292e-05,
"epoch": 0.7,
"step": 11600
},
{
"loss": 0.0752,
"grad_norm": 0.7918893694877625,
"learning_rate": 2.584953816237239e-05,
"epoch": 0.71,
"step": 11700
},
{
"loss": 0.0734,
"grad_norm": 0.6565855145454407,
"learning_rate": 2.5728001944579485e-05,
"epoch": 0.72,
"step": 11800
},
{
"loss": 0.0717,
"grad_norm": 1.9885566234588623,
"learning_rate": 2.5606465726786582e-05,
"epoch": 0.72,
"step": 11900
},
{
"loss": 0.0747,
"grad_norm": 0.6101750135421753,
"learning_rate": 2.5484929508993682e-05,
"epoch": 0.73,
"step": 12000
},
{
"loss": 0.073,
"grad_norm": 1.001930594444275,
"learning_rate": 2.536339329120078e-05,
"epoch": 0.73,
"step": 12100
},
{
"loss": 0.074,
"grad_norm": 0.880673348903656,
"learning_rate": 2.5241857073407875e-05,
"epoch": 0.74,
"step": 12200
},
{
"loss": 0.0738,
"grad_norm": 0.7980429530143738,
"learning_rate": 2.5120320855614975e-05,
"epoch": 0.75,
"step": 12300
},
{
"loss": 0.0758,
"grad_norm": 1.0153135061264038,
"learning_rate": 2.4998784637822072e-05,
"epoch": 0.75,
"step": 12400
},
{
"loss": 0.0742,
"grad_norm": 0.8344822525978088,
"learning_rate": 2.487724842002917e-05,
"epoch": 0.76,
"step": 12500
},
{
"loss": 0.0738,
"grad_norm": 0.6752304434776306,
"learning_rate": 2.4755712202236272e-05,
"epoch": 0.76,
"step": 12600
},
{
"loss": 0.0732,
"grad_norm": 1.1106210947036743,
"learning_rate": 2.4634175984443366e-05,
"epoch": 0.77,
"step": 12700
},
{
"loss": 0.0754,
"grad_norm": 0.8022058606147766,
"learning_rate": 2.4512639766650462e-05,
"epoch": 0.78,
"step": 12800
},
{
"loss": 0.0735,
"grad_norm": 0.737308144569397,
"learning_rate": 2.439110354885756e-05,
"epoch": 0.78,
"step": 12900
},
{
"loss": 0.0738,
"grad_norm": 2.094043493270874,
"learning_rate": 2.4269567331064662e-05,
"epoch": 0.79,
"step": 13000
},
{
"loss": 0.072,
"grad_norm": 1.1105279922485352,
"learning_rate": 2.4148031113271756e-05,
"epoch": 0.79,
"step": 13100
},
{
"loss": 0.0716,
"grad_norm": 1.2243571281433105,
"learning_rate": 2.4026494895478852e-05,
"epoch": 0.8,
"step": 13200
},
{
"loss": 0.0718,
"grad_norm": 1.0883300304412842,
"learning_rate": 2.3904958677685956e-05,
"epoch": 0.81,
"step": 13300
},
{
"loss": 0.0727,
"grad_norm": 0.9934273362159729,
"learning_rate": 2.378342245989305e-05,
"epoch": 0.81,
"step": 13400
},
{
"loss": 0.0721,
"grad_norm": 0.7145100831985474,
"learning_rate": 2.3661886242100146e-05,
"epoch": 0.82,
"step": 13500
},
{
"loss": 0.0721,
"grad_norm": 0.8873516321182251,
"learning_rate": 2.3540350024307243e-05,
"epoch": 0.83,
"step": 13600
},
{
"loss": 0.0723,
"grad_norm": 0.7798359990119934,
"learning_rate": 2.3418813806514346e-05,
"epoch": 0.83,
"step": 13700
},
{
"loss": 0.0726,
"grad_norm": 0.9411553740501404,
"learning_rate": 2.329727758872144e-05,
"epoch": 0.84,
"step": 13800
},
{
"loss": 0.0715,
"grad_norm": 0.7994709610939026,
"learning_rate": 2.3175741370928536e-05,
"epoch": 0.84,
"step": 13900
},
{
"loss": 0.0732,
"grad_norm": 0.5489715337753296,
"learning_rate": 2.305420515313564e-05,
"epoch": 0.85,
"step": 14000
},
{
"loss": 0.0699,
"grad_norm": 0.5710996389389038,
"learning_rate": 2.2932668935342736e-05,
"epoch": 0.86,
"step": 14100
},
{
"loss": 0.073,
"grad_norm": 0.7003745436668396,
"learning_rate": 2.281113271754983e-05,
"epoch": 0.86,
"step": 14200
},
{
"loss": 0.0722,
"grad_norm": 0.6743086576461792,
"learning_rate": 2.2689596499756926e-05,
"epoch": 0.87,
"step": 14300
},
{
"loss": 0.0699,
"grad_norm": 0.6730968356132507,
"learning_rate": 2.256806028196403e-05,
"epoch": 0.87,
"step": 14400
},
{
"loss": 0.0719,
"grad_norm": 0.7155641913414001,
"learning_rate": 2.2446524064171126e-05,
"epoch": 0.88,
"step": 14500
},
{
"loss": 0.0708,
"grad_norm": 0.8122462630271912,
"learning_rate": 2.232498784637822e-05,
"epoch": 0.89,
"step": 14600
},
{
"loss": 0.0718,
"grad_norm": 0.8022533655166626,
"learning_rate": 2.2203451628585323e-05,
"epoch": 0.89,
"step": 14700
},
{
"loss": 0.0712,
"grad_norm": 0.545359194278717,
"learning_rate": 2.208191541079242e-05,
"epoch": 0.9,
"step": 14800
},
{
"loss": 0.0711,
"grad_norm": 0.8318025469779968,
"learning_rate": 2.1960379192999513e-05,
"epoch": 0.9,
"step": 14900
},
{
"loss": 0.0706,
"grad_norm": 0.9334779381752014,
"learning_rate": 2.1838842975206616e-05,
"epoch": 0.91,
"step": 15000
},
{
"loss": 0.0701,
"grad_norm": 0.8202875256538391,
"learning_rate": 2.1717306757413713e-05,
"epoch": 0.92,
"step": 15100
},
{
"loss": 0.07,
"grad_norm": 0.8788963556289673,
"learning_rate": 2.159577053962081e-05,
"epoch": 0.92,
"step": 15200
},
{
"loss": 0.0713,
"grad_norm": 1.023823618888855,
"learning_rate": 2.1474234321827903e-05,
"epoch": 0.93,
"step": 15300
},
{
"loss": 0.0697,
"grad_norm": 0.8784018158912659,
"learning_rate": 2.1353913466212936e-05,
"epoch": 0.93,
"step": 15400
},
{
"loss": 0.0695,
"grad_norm": 1.1254814863204956,
"learning_rate": 2.1232377248420032e-05,
"epoch": 0.94,
"step": 15500
},
{
"loss": 0.0697,
"grad_norm": 0.9760749340057373,
"learning_rate": 2.1110841030627126e-05,
"epoch": 0.95,
"step": 15600
},
{
"loss": 0.0709,
"grad_norm": 1.0121357440948486,
"learning_rate": 2.098930481283423e-05,
"epoch": 0.95,
"step": 15700
},
{
"loss": 0.0717,
"grad_norm": 0.7810111045837402,
"learning_rate": 2.0867768595041326e-05,
"epoch": 0.96,
"step": 15800
},
{
"loss": 0.0692,
"grad_norm": 0.6813214421272278,
"learning_rate": 2.074623237724842e-05,
"epoch": 0.96,
"step": 15900
},
{
"loss": 0.0696,
"grad_norm": 0.7685451507568359,
"learning_rate": 2.0624696159455516e-05,
"epoch": 0.97,
"step": 16000
},
{
"loss": 0.0702,
"grad_norm": 3.3225691318511963,
"learning_rate": 2.050315994166262e-05,
"epoch": 0.98,
"step": 16100
},
{
"loss": 0.0702,
"grad_norm": 0.7979671955108643,
"learning_rate": 2.0381623723869716e-05,
"epoch": 0.98,
"step": 16200
},
{
"loss": 0.0691,
"grad_norm": 3.4929583072662354,
"learning_rate": 2.026008750607681e-05,
"epoch": 0.99,
"step": 16300
},
{
"loss": 0.0703,
"grad_norm": 0.7738245725631714,
"learning_rate": 2.0138551288283913e-05,
"epoch": 1.0,
"step": 16400
},
{
"eval_loss": 0.06881729513406754,
"eval_f1": 0.8973916467400326,
"eval_precision": 0.9049522471305407,
"eval_recall": 0.8906029559155776,
"eval_accuracy": 0.9730252863363563,
"eval_runtime": 304.4852,
"eval_samples_per_second": 86.796,
"eval_steps_per_second": 10.851,
"epoch": 1.0,
"step": 16481
},
{
"loss": 0.0684,
"grad_norm": 0.891858696937561,
"learning_rate": 2.001701507049101e-05,
"epoch": 1.0,
"step": 16500
},
{
"loss": 0.0619,
"grad_norm": 0.6408938765525818,
"learning_rate": 1.9895478852698106e-05,
"epoch": 1.01,
"step": 16600
},
{
"loss": 0.0629,
"grad_norm": 0.7390792965888977,
"learning_rate": 1.9773942634905203e-05,
"epoch": 1.01,
"step": 16700
},
{
"loss": 0.0604,
"grad_norm": 0.5206795930862427,
"learning_rate": 1.9652406417112303e-05,
"epoch": 1.02,
"step": 16800
},
{
"loss": 0.0613,
"grad_norm": 0.909116268157959,
"learning_rate": 1.95308701993194e-05,
"epoch": 1.03,
"step": 16900
},
{
"loss": 0.0616,
"grad_norm": 0.8701964020729065,
"learning_rate": 1.9409333981526496e-05,
"epoch": 1.03,
"step": 17000
},
{
"loss": 0.0625,
"grad_norm": 1.0762407779693604,
"learning_rate": 1.9287797763733593e-05,
"epoch": 1.04,
"step": 17100
},
{
"loss": 0.0615,
"grad_norm": 0.7816362380981445,
"learning_rate": 1.9166261545940693e-05,
"epoch": 1.04,
"step": 17200
},
{
"loss": 0.0626,
"grad_norm": 0.6983965039253235,
"learning_rate": 1.904594069032572e-05,
"epoch": 1.05,
"step": 17300
},
{
"loss": 0.0621,
"grad_norm": 0.910698413848877,
"learning_rate": 1.8924404472532816e-05,
"epoch": 1.06,
"step": 17400
},
{
"loss": 0.0631,
"grad_norm": 0.8654133677482605,
"learning_rate": 1.8802868254739916e-05,
"epoch": 1.06,
"step": 17500
},
{
"loss": 0.062,
"grad_norm": 0.8351789712905884,
"learning_rate": 1.8681332036947012e-05,
"epoch": 1.07,
"step": 17600
},
{
"loss": 0.0604,
"grad_norm": 0.7861587405204773,
"learning_rate": 1.855979581915411e-05,
"epoch": 1.07,
"step": 17700
},
{
"loss": 0.0609,
"grad_norm": 0.7295276522636414,
"learning_rate": 1.843825960136121e-05,
"epoch": 1.08,
"step": 17800
},
{
"loss": 0.0616,
"grad_norm": 1.0210868120193481,
"learning_rate": 1.8316723383568306e-05,
"epoch": 1.09,
"step": 17900
},
{
"loss": 0.0616,
"grad_norm": 0.8220874071121216,
"learning_rate": 1.8195187165775403e-05,
"epoch": 1.09,
"step": 18000
},
{
"loss": 0.0607,
"grad_norm": 0.7961727380752563,
"learning_rate": 1.80736509479825e-05,
"epoch": 1.1,
"step": 18100
},
{
"loss": 0.0614,
"grad_norm": 1.0390113592147827,
"learning_rate": 1.79521147301896e-05,
"epoch": 1.1,
"step": 18200
},
{
"loss": 0.0625,
"grad_norm": 0.8423497080802917,
"learning_rate": 1.7830578512396696e-05,
"epoch": 1.11,
"step": 18300
},
{
"loss": 0.0618,
"grad_norm": 0.7576957941055298,
"learning_rate": 1.7709042294603793e-05,
"epoch": 1.12,
"step": 18400
},
{
"loss": 0.061,
"grad_norm": 0.7174555659294128,
"learning_rate": 1.7587506076810893e-05,
"epoch": 1.12,
"step": 18500
},
{
"loss": 0.0602,
"grad_norm": 0.7977816462516785,
"learning_rate": 1.746596985901799e-05,
"epoch": 1.13,
"step": 18600
},
{
"loss": 0.0617,
"grad_norm": 0.8125550150871277,
"learning_rate": 1.7344433641225086e-05,
"epoch": 1.13,
"step": 18700
},
{
"loss": 0.0605,
"grad_norm": 1.3914258480072021,
"learning_rate": 1.7222897423432183e-05,
"epoch": 1.14,
"step": 18800
},
{
"loss": 0.0614,
"grad_norm": 0.8273860812187195,
"learning_rate": 1.7101361205639283e-05,
"epoch": 1.15,
"step": 18900
},
{
"loss": 0.0606,
"grad_norm": 0.7267687916755676,
"learning_rate": 1.697982498784638e-05,
"epoch": 1.15,
"step": 19000
},
{
"loss": 0.0624,
"grad_norm": 1.075861930847168,
"learning_rate": 1.6858288770053476e-05,
"epoch": 1.16,
"step": 19100
},
{
"loss": 0.062,
"grad_norm": 0.867139995098114,
"learning_rate": 1.6736752552260576e-05,
"epoch": 1.16,
"step": 19200
},
{
"loss": 0.0595,
"grad_norm": 0.6730388402938843,
"learning_rate": 1.6615216334467673e-05,
"epoch": 1.17,
"step": 19300
},
{
"loss": 0.0603,
"grad_norm": 0.7329290509223938,
"learning_rate": 1.649368011667477e-05,
"epoch": 1.18,
"step": 19400
},
{
"loss": 0.0605,
"grad_norm": 1.0000228881835938,
"learning_rate": 1.6372143898881866e-05,
"epoch": 1.18,
"step": 19500
},
{
"loss": 0.0599,
"grad_norm": 1.0037493705749512,
"learning_rate": 1.6250607681088967e-05,
"epoch": 1.19,
"step": 19600
},
{
"loss": 0.0616,
"grad_norm": 0.7647894024848938,
"learning_rate": 1.6129071463296063e-05,
"epoch": 1.2,
"step": 19700
},
{
"loss": 0.0604,
"grad_norm": 0.78948575258255,
"learning_rate": 1.600753524550316e-05,
"epoch": 1.2,
"step": 19800
},
{
"loss": 0.0609,
"grad_norm": 0.8443770408630371,
"learning_rate": 1.588599902771026e-05,
"epoch": 1.21,
"step": 19900
},
{
"loss": 0.0599,
"grad_norm": 1.1531789302825928,
"learning_rate": 1.5764462809917357e-05,
"epoch": 1.21,
"step": 20000
},
{
"loss": 0.0605,
"grad_norm": 0.7325319647789001,
"learning_rate": 1.5642926592124453e-05,
"epoch": 1.22,
"step": 20100
},
{
"loss": 0.0606,
"grad_norm": 0.8585038185119629,
"learning_rate": 1.5521390374331553e-05,
"epoch": 1.23,
"step": 20200
},
{
"loss": 0.0602,
"grad_norm": 0.6652311086654663,
"learning_rate": 1.539985415653865e-05,
"epoch": 1.23,
"step": 20300
},
{
"loss": 0.0605,
"grad_norm": 0.9240396618843079,
"learning_rate": 1.5278317938745747e-05,
"epoch": 1.24,
"step": 20400
},
{
"loss": 0.0609,
"grad_norm": 0.9992942214012146,
"learning_rate": 1.5156781720952845e-05,
"epoch": 1.24,
"step": 20500
},
{
"loss": 0.0604,
"grad_norm": 0.7454150915145874,
"learning_rate": 1.5035245503159944e-05,
"epoch": 1.25,
"step": 20600
},
{
"loss": 0.0598,
"grad_norm": 0.8551883101463318,
"learning_rate": 1.491370928536704e-05,
"epoch": 1.26,
"step": 20700
},
{
"loss": 0.061,
"grad_norm": 0.8273564577102661,
"learning_rate": 1.4792173067574139e-05,
"epoch": 1.26,
"step": 20800
},
{
"loss": 0.06,
"grad_norm": 0.925244927406311,
"learning_rate": 1.4671852211959166e-05,
"epoch": 1.27,
"step": 20900
},
{
"loss": 0.0587,
"grad_norm": 0.5892955660820007,
"learning_rate": 1.4550315994166261e-05,
"epoch": 1.27,
"step": 21000
},
{
"loss": 0.0602,
"grad_norm": 0.7904210090637207,
"learning_rate": 1.4428779776373361e-05,
"epoch": 1.28,
"step": 21100
},
{
"loss": 0.0625,
"grad_norm": 1.2804646492004395,
"learning_rate": 1.430724355858046e-05,
"epoch": 1.29,
"step": 21200
},
{
"loss": 0.0607,
"grad_norm": 0.9952909350395203,
"learning_rate": 1.4185707340787556e-05,
"epoch": 1.29,
"step": 21300
},
{
"loss": 0.0602,
"grad_norm": 0.9036094546318054,
"learning_rate": 1.4064171122994655e-05,
"epoch": 1.3,
"step": 21400
},
{
"loss": 0.0594,
"grad_norm": 0.8128438591957092,
"learning_rate": 1.3942634905201751e-05,
"epoch": 1.3,
"step": 21500
},
{
"loss": 0.0593,
"grad_norm": 0.786703884601593,
"learning_rate": 1.382109868740885e-05,
"epoch": 1.31,
"step": 21600
},
{
"loss": 0.0604,
"grad_norm": 1.107258677482605,
"learning_rate": 1.3699562469615946e-05,
"epoch": 1.32,
"step": 21700
},
{
"loss": 0.0596,
"grad_norm": 1.0990906953811646,
"learning_rate": 1.3578026251823045e-05,
"epoch": 1.32,
"step": 21800
},
{
"loss": 0.0611,
"grad_norm": 0.7040949463844299,
"learning_rate": 1.3456490034030143e-05,
"epoch": 1.33,
"step": 21900
},
{
"loss": 0.0582,
"grad_norm": 0.7568740248680115,
"learning_rate": 1.333495381623724e-05,
"epoch": 1.33,
"step": 22000
},
{
"loss": 0.0595,
"grad_norm": 0.6342681646347046,
"learning_rate": 1.3213417598444338e-05,
"epoch": 1.34,
"step": 22100
},
{
"loss": 0.0597,
"grad_norm": 0.7555422186851501,
"learning_rate": 1.3091881380651435e-05,
"epoch": 1.35,
"step": 22200
},
{
"loss": 0.0587,
"grad_norm": 0.8620259165763855,
"learning_rate": 1.2970345162858533e-05,
"epoch": 1.35,
"step": 22300
},
{
"loss": 0.0586,
"grad_norm": 1.4132779836654663,
"learning_rate": 1.2848808945065632e-05,
"epoch": 1.36,
"step": 22400
},
{
"loss": 0.0594,
"grad_norm": 0.9352446794509888,
"learning_rate": 1.2727272727272728e-05,
"epoch": 1.37,
"step": 22500
},
{
"loss": 0.0581,
"grad_norm": 0.8808399438858032,
"learning_rate": 1.2605736509479827e-05,
"epoch": 1.37,
"step": 22600
},
{
"loss": 0.0603,
"grad_norm": 0.8254494071006775,
"learning_rate": 1.2484200291686924e-05,
"epoch": 1.38,
"step": 22700
},
{
"loss": 0.0589,
"grad_norm": 0.9145941138267517,
"learning_rate": 1.2362664073894022e-05,
"epoch": 1.38,
"step": 22800
},
{
"loss": 0.0594,
"grad_norm": 1.267179012298584,
"learning_rate": 1.2241127856101119e-05,
"epoch": 1.39,
"step": 22900
},
{
"loss": 0.0585,
"grad_norm": 0.9012957215309143,
"learning_rate": 1.2119591638308217e-05,
"epoch": 1.4,
"step": 23000
},
{
"loss": 0.0581,
"grad_norm": 1.053276777267456,
"learning_rate": 1.1998055420515315e-05,
"epoch": 1.4,
"step": 23100
},
{
"loss": 0.0579,
"grad_norm": 1.031724214553833,
"learning_rate": 1.1876519202722412e-05,
"epoch": 1.41,
"step": 23200
},
{
"loss": 0.0574,
"grad_norm": 0.8730105757713318,
"learning_rate": 1.175498298492951e-05,
"epoch": 1.41,
"step": 23300
},
{
"loss": 0.0589,
"grad_norm": 0.871724545955658,
"learning_rate": 1.1633446767136607e-05,
"epoch": 1.42,
"step": 23400
},
{
"loss": 0.0585,
"grad_norm": 0.9031744599342346,
"learning_rate": 1.1511910549343706e-05,
"epoch": 1.43,
"step": 23500
},
{
"loss": 0.0586,
"grad_norm": 0.5891318917274475,
"learning_rate": 1.1390374331550802e-05,
"epoch": 1.43,
"step": 23600
},
{
"loss": 0.0584,
"grad_norm": 0.7399836182594299,
"learning_rate": 1.12688381137579e-05,
"epoch": 1.44,
"step": 23700
},
{
"loss": 0.0596,
"grad_norm": 0.47165361046791077,
"learning_rate": 1.1147301895964999e-05,
"epoch": 1.44,
"step": 23800
},
{
"loss": 0.0588,
"grad_norm": 0.8805158734321594,
"learning_rate": 1.1025765678172096e-05,
"epoch": 1.45,
"step": 23900
},
{
"loss": 0.0587,
"grad_norm": 0.6524300575256348,
"learning_rate": 1.0904229460379194e-05,
"epoch": 1.46,
"step": 24000
},
{
"loss": 0.0599,
"grad_norm": 0.7314462661743164,
"learning_rate": 1.078269324258629e-05,
"epoch": 1.46,
"step": 24100
},
{
"loss": 0.0587,
"grad_norm": 0.7969116568565369,
"learning_rate": 1.0661157024793389e-05,
"epoch": 1.47,
"step": 24200
},
{
"loss": 0.0574,
"grad_norm": 0.6548510193824768,
"learning_rate": 1.0539620807000488e-05,
"epoch": 1.47,
"step": 24300
},
{
"loss": 0.0601,
"grad_norm": 0.6944112181663513,
"learning_rate": 1.0418084589207584e-05,
"epoch": 1.48,
"step": 24400
},
{
"loss": 0.0595,
"grad_norm": 1.0091618299484253,
"learning_rate": 1.0296548371414683e-05,
"epoch": 1.49,
"step": 24500
},
{
"loss": 0.0567,
"grad_norm": 0.7692497372627258,
"learning_rate": 1.017501215362178e-05,
"epoch": 1.49,
"step": 24600
},
{
"loss": 0.0567,
"grad_norm": 1.2263282537460327,
"learning_rate": 1.0053475935828878e-05,
"epoch": 1.5,
"step": 24700
},
{
"loss": 0.058,
"grad_norm": 1.412335753440857,
"learning_rate": 9.931939718035976e-06,
"epoch": 1.5,
"step": 24800
},
{
"loss": 0.0584,
"grad_norm": 0.9114163517951965,
"learning_rate": 9.810403500243073e-06,
"epoch": 1.51,
"step": 24900
},
{
"loss": 0.0579,
"grad_norm": 0.8343012928962708,
"learning_rate": 9.688867282450171e-06,
"epoch": 1.52,
"step": 25000
},
{
"loss": 0.0581,
"grad_norm": 0.7137165665626526,
"learning_rate": 9.567331064657268e-06,
"epoch": 1.52,
"step": 25100
},
{
"loss": 0.0572,
"grad_norm": 0.8871126174926758,
"learning_rate": 9.445794846864366e-06,
"epoch": 1.53,
"step": 25200
},
{
"loss": 0.0588,
"grad_norm": 1.9913699626922607,
"learning_rate": 9.324258629071465e-06,
"epoch": 1.54,
"step": 25300
},
{
"loss": 0.0586,
"grad_norm": 0.702129065990448,
"learning_rate": 9.202722411278561e-06,
"epoch": 1.54,
"step": 25400
},
{
"loss": 0.0589,
"grad_norm": 0.759503960609436,
"learning_rate": 9.08118619348566e-06,
"epoch": 1.55,
"step": 25500
},
{
"loss": 0.0598,
"grad_norm": 0.7731884717941284,
"learning_rate": 8.959649975692756e-06,
"epoch": 1.55,
"step": 25600
},
{
"loss": 0.0574,
"grad_norm": 0.830560028553009,
"learning_rate": 8.838113757899855e-06,
"epoch": 1.56,
"step": 25700
},
{
"loss": 0.0561,
"grad_norm": 0.612714946269989,
"learning_rate": 8.716577540106953e-06,
"epoch": 1.57,
"step": 25800
},
{
"loss": 0.0583,
"grad_norm": 0.6476453542709351,
"learning_rate": 8.59504132231405e-06,
"epoch": 1.57,
"step": 25900
},
{
"loss": 0.0567,
"grad_norm": 0.6660561561584473,
"learning_rate": 8.473505104521148e-06,
"epoch": 1.58,
"step": 26000
},
{
"loss": 0.0575,
"grad_norm": 0.6638226509094238,
"learning_rate": 8.351968886728245e-06,
"epoch": 1.58,
"step": 26100
},
{
"loss": 0.0567,
"grad_norm": 0.6452857255935669,
"learning_rate": 8.231648031113272e-06,
"epoch": 1.59,
"step": 26200
},
{
"loss": 0.0567,
"grad_norm": 0.819333016872406,
"learning_rate": 8.11011181332037e-06,
"epoch": 1.6,
"step": 26300
},
{
"loss": 0.0571,
"grad_norm": 1.2114768028259277,
"learning_rate": 7.988575595527467e-06,
"epoch": 1.6,
"step": 26400
},
{
"loss": 0.0577,
"grad_norm": 0.7581117153167725,
"learning_rate": 7.867039377734566e-06,
"epoch": 1.61,
"step": 26500
},
{
"loss": 0.0575,
"grad_norm": 0.5861278772354126,
"learning_rate": 7.745503159941663e-06,
"epoch": 1.61,
"step": 26600
},
{
"loss": 0.0567,
"grad_norm": 0.7154746055603027,
"learning_rate": 7.623966942148761e-06,
"epoch": 1.62,
"step": 26700
},
{
"loss": 0.0574,
"grad_norm": 1.072407841682434,
"learning_rate": 7.502430724355859e-06,
"epoch": 1.63,
"step": 26800
},
{
"loss": 0.0572,
"grad_norm": 0.8198044896125793,
"learning_rate": 7.380894506562957e-06,
"epoch": 1.63,
"step": 26900
},
{
"loss": 0.0562,
"grad_norm": 0.7912253141403198,
"learning_rate": 7.259358288770054e-06,
"epoch": 1.64,
"step": 27000
},
{
"loss": 0.0567,
"grad_norm": 0.9015645980834961,
"learning_rate": 7.137822070977152e-06,
"epoch": 1.64,
"step": 27100
},
{
"loss": 0.0551,
"grad_norm": 0.6205886602401733,
"learning_rate": 7.0162858531842495e-06,
"epoch": 1.65,
"step": 27200
},
{
"loss": 0.0581,
"grad_norm": 0.8834924697875977,
"learning_rate": 6.894749635391347e-06,
"epoch": 1.66,
"step": 27300
},
{
"loss": 0.0565,
"grad_norm": 0.7698688507080078,
"learning_rate": 6.773213417598445e-06,
"epoch": 1.66,
"step": 27400
},
{
"loss": 0.0575,
"grad_norm": 0.8447450399398804,
"learning_rate": 6.651677199805543e-06,
"epoch": 1.67,
"step": 27500
},
{
"loss": 0.057,
"grad_norm": 1.6002224683761597,
"learning_rate": 6.5301409820126404e-06,
"epoch": 1.67,
"step": 27600
},
{
"loss": 0.0558,
"grad_norm": 0.8625892996788025,
"learning_rate": 6.408604764219738e-06,
"epoch": 1.68,
"step": 27700
},
{
"loss": 0.0566,
"grad_norm": 0.7483322024345398,
"learning_rate": 6.2870685464268355e-06,
"epoch": 1.69,
"step": 27800
},
{
"loss": 0.0571,
"grad_norm": 0.781535804271698,
"learning_rate": 6.165532328633933e-06,
"epoch": 1.69,
"step": 27900
},
{
"loss": 0.0563,
"grad_norm": 0.8761783838272095,
"learning_rate": 6.0439961108410314e-06,
"epoch": 1.7,
"step": 28000
},
{
"loss": 0.0565,
"grad_norm": 0.5183244943618774,
"learning_rate": 5.922459893048129e-06,
"epoch": 1.7,
"step": 28100
},
{
"loss": 0.0564,
"grad_norm": 0.7939796447753906,
"learning_rate": 5.8009236752552265e-06,
"epoch": 1.71,
"step": 28200
},
{
"loss": 0.0576,
"grad_norm": 0.7260966300964355,
"learning_rate": 5.679387457462324e-06,
"epoch": 1.72,
"step": 28300
},
{
"loss": 0.0569,
"grad_norm": 0.9087544083595276,
"learning_rate": 5.557851239669422e-06,
"epoch": 1.72,
"step": 28400
},
{
"loss": 0.056,
"grad_norm": 0.7275218367576599,
"learning_rate": 5.436315021876519e-06,
"epoch": 1.73,
"step": 28500
},
{
"loss": 0.0563,
"grad_norm": 0.5983753800392151,
"learning_rate": 5.315994166261547e-06,
"epoch": 1.74,
"step": 28600
},
{
"loss": 0.0564,
"grad_norm": 0.912756085395813,
"learning_rate": 5.194457948468644e-06,
"epoch": 1.74,
"step": 28700
},
{
"loss": 0.0555,
"grad_norm": 0.6085710525512695,
"learning_rate": 5.072921730675742e-06,
"epoch": 1.75,
"step": 28800
},
{
"loss": 0.0571,
"grad_norm": 0.6775307655334473,
"learning_rate": 4.95138551288284e-06,
"epoch": 1.75,
"step": 28900
},
{
"loss": 0.0543,
"grad_norm": 0.7438898682594299,
"learning_rate": 4.829849295089938e-06,
"epoch": 1.76,
"step": 29000
},
{
"loss": 0.0567,
"grad_norm": 0.719668984413147,
"learning_rate": 4.708313077297035e-06,
"epoch": 1.77,
"step": 29100
},
{
"loss": 0.0565,
"grad_norm": 0.8647979497909546,
"learning_rate": 4.586776859504133e-06,
"epoch": 1.77,
"step": 29200
},
{
"loss": 0.057,
"grad_norm": 0.8238335847854614,
"learning_rate": 4.46524064171123e-06,
"epoch": 1.78,
"step": 29300
},
{
"loss": 0.0563,
"grad_norm": 3.2504589557647705,
"learning_rate": 4.343704423918328e-06,
"epoch": 1.78,
"step": 29400
},
{
"loss": 0.0536,
"grad_norm": 0.7106683850288391,
"learning_rate": 4.222168206125426e-06,
"epoch": 1.79,
"step": 29500
},
{
"loss": 0.056,
"grad_norm": 0.9477577209472656,
"learning_rate": 4.100631988332524e-06,
"epoch": 1.8,
"step": 29600
},
{
"loss": 0.0562,
"grad_norm": 0.8888897895812988,
"learning_rate": 3.979095770539621e-06,
"epoch": 1.8,
"step": 29700
},
{
"loss": 0.0562,
"grad_norm": 0.7125309705734253,
"learning_rate": 3.857559552746719e-06,
"epoch": 1.81,
"step": 29800
},
{
"loss": 0.0552,
"grad_norm": 0.7241693139076233,
"learning_rate": 3.7360233349538167e-06,
"epoch": 1.81,
"step": 29900
},
{
"loss": 0.0556,
"grad_norm": 0.9381842613220215,
"learning_rate": 3.6144871171609143e-06,
"epoch": 1.82,
"step": 30000
},
{
"loss": 0.0551,
"grad_norm": 0.6808192133903503,
"learning_rate": 3.492950899368012e-06,
"epoch": 1.83,
"step": 30100
},
{
"loss": 0.0561,
"grad_norm": 0.6042631268501282,
"learning_rate": 3.3714146815751098e-06,
"epoch": 1.83,
"step": 30200
},
{
"loss": 0.0553,
"grad_norm": 0.5585273504257202,
"learning_rate": 3.2498784637822073e-06,
"epoch": 1.84,
"step": 30300
},
{
"loss": 0.0545,
"grad_norm": 0.9048868417739868,
"learning_rate": 3.128342245989305e-06,
"epoch": 1.84,
"step": 30400
},
{
"loss": 0.0557,
"grad_norm": 0.8429957628250122,
"learning_rate": 3.006806028196403e-06,
"epoch": 1.85,
"step": 30500
},
{
"loss": 0.0563,
"grad_norm": 0.7962875962257385,
"learning_rate": 2.8852698104035003e-06,
"epoch": 1.86,
"step": 30600
},
{
"loss": 0.0559,
"grad_norm": 0.7854676246643066,
"learning_rate": 2.763733592610598e-06,
"epoch": 1.86,
"step": 30700
},
{
"loss": 0.0561,
"grad_norm": 1.694869041442871,
"learning_rate": 2.642197374817696e-06,
"epoch": 1.87,
"step": 30800
},
{
"loss": 0.0568,
"grad_norm": 0.6683087944984436,
"learning_rate": 2.5206611570247934e-06,
"epoch": 1.87,
"step": 30900
},
{
"loss": 0.0548,
"grad_norm": 0.5675504803657532,
"learning_rate": 2.3991249392318913e-06,
"epoch": 1.88,
"step": 31000
},
{
"loss": 0.0552,
"grad_norm": 0.9730797410011292,
"learning_rate": 2.2775887214389893e-06,
"epoch": 1.89,
"step": 31100
},
{
"loss": 0.0568,
"grad_norm": 0.8015105128288269,
"learning_rate": 2.156052503646087e-06,
"epoch": 1.89,
"step": 31200
},
{
"loss": 0.0552,
"grad_norm": 0.5437925457954407,
"learning_rate": 2.0345162858531844e-06,
"epoch": 1.9,
"step": 31300
},
{
"loss": 0.0558,
"grad_norm": 0.8105918765068054,
"learning_rate": 1.9129800680602823e-06,
"epoch": 1.91,
"step": 31400
},
{
"loss": 0.0567,
"grad_norm": 0.8699814677238464,
"learning_rate": 1.7914438502673799e-06,
"epoch": 1.91,
"step": 31500
},
{
"loss": 0.0556,
"grad_norm": 0.542261004447937,
"learning_rate": 1.6699076324744776e-06,
"epoch": 1.92,
"step": 31600
},
{
"loss": 0.0553,
"grad_norm": 0.6852170825004578,
"learning_rate": 1.5483714146815754e-06,
"epoch": 1.92,
"step": 31700
},
{
"loss": 0.0559,
"grad_norm": 0.8324136137962341,
"learning_rate": 1.426835196888673e-06,
"epoch": 1.93,
"step": 31800
},
{
"loss": 0.0539,
"grad_norm": 0.5395376086235046,
"learning_rate": 1.3052989790957707e-06,
"epoch": 1.94,
"step": 31900
},
{
"loss": 0.0557,
"grad_norm": 1.0665556192398071,
"learning_rate": 1.1837627613028684e-06,
"epoch": 1.94,
"step": 32000
},
{
"loss": 0.0556,
"grad_norm": 0.5730076432228088,
"learning_rate": 1.062226543509966e-06,
"epoch": 1.95,
"step": 32100
},
{
"loss": 0.0566,
"grad_norm": 0.8526155352592468,
"learning_rate": 9.406903257170638e-07,
"epoch": 1.95,
"step": 32200
},
{
"loss": 0.0554,
"grad_norm": 0.47227638959884644,
"learning_rate": 8.191541079241614e-07,
"epoch": 1.96,
"step": 32300
},
{
"loss": 0.0559,
"grad_norm": 0.5771980881690979,
"learning_rate": 6.976178901312592e-07,
"epoch": 1.97,
"step": 32400
},
{
"loss": 0.0553,
"grad_norm": 0.7183811068534851,
"learning_rate": 5.772970345162859e-07,
"epoch": 1.97,
"step": 32500
},
{
"loss": 0.0556,
"grad_norm": 0.7808952927589417,
"learning_rate": 4.557608167233836e-07,
"epoch": 1.98,
"step": 32600
},
{
"loss": 0.0549,
"grad_norm": 0.7201197743415833,
"learning_rate": 3.3422459893048135e-07,
"epoch": 1.98,
"step": 32700
},
{
"loss": 0.0546,
"grad_norm": 0.822515606880188,
"learning_rate": 2.1268838113757902e-07,
"epoch": 1.99,
"step": 32800
},
{
"loss": 0.0556,
"grad_norm": 0.6968460083007812,
"learning_rate": 9.115216334467672e-08,
"epoch": 2.0,
"step": 32900
},
{
"eval_loss": 0.06514331698417664,
"eval_f1": 0.9055283859012663,
"eval_precision": 0.9128121708644065,
"eval_recall": 0.898553824781504,
"eval_accuracy": 0.9750088848296079,
"eval_runtime": 304.326,
"eval_samples_per_second": 86.841,
"eval_steps_per_second": 10.857,
"epoch": 2.0,
"step": 32962
},
{
"train_runtime": 12949.9436,
"train_samples_per_second": 20.363,
"train_steps_per_second": 2.545,
"total_flos": 2.448996403000443e+17,
"train_loss": 0.07225221031304233,
"epoch": 2.0,
"step": 32962
}
]
}
}
}