|
{ |
|
"tests": { |
|
"22": { |
|
"id": 22, |
|
"task": 2, |
|
"model": "xlm-roberta-large", |
|
"languages": [ |
|
"cy" |
|
], |
|
"augmentation": [ |
|
"" |
|
], |
|
"data_percentage": 1, |
|
"use_token_type_ids": false, |
|
"tokenizer_config": { |
|
"strip_accent": false, |
|
"add_prefix_space": true |
|
}, |
|
"opimizer_config": { |
|
"adafactor": true, |
|
"num_train_epochs": 2 |
|
}, |
|
"result": [ |
|
{ |
|
"loss": 1.882, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0, |
|
"epoch": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"loss": 0.5285, |
|
"grad_norm": 2.4953722953796387, |
|
"learning_rate": 3.99453087019932e-05, |
|
"epoch": 0.01, |
|
"step": 100 |
|
}, |
|
{ |
|
"loss": 0.1702, |
|
"grad_norm": 1.8322360515594482, |
|
"learning_rate": 3.982377248420029e-05, |
|
"epoch": 0.01, |
|
"step": 200 |
|
}, |
|
{ |
|
"loss": 0.1472, |
|
"grad_norm": 1.7121275663375854, |
|
"learning_rate": 3.970223626640739e-05, |
|
"epoch": 0.02, |
|
"step": 300 |
|
}, |
|
{ |
|
"loss": 0.1342, |
|
"grad_norm": 1.7097556591033936, |
|
"learning_rate": 3.958070004861449e-05, |
|
"epoch": 0.02, |
|
"step": 400 |
|
}, |
|
{ |
|
"loss": 0.1288, |
|
"grad_norm": 1.57424795627594, |
|
"learning_rate": 3.9459163830821586e-05, |
|
"epoch": 0.03, |
|
"step": 500 |
|
}, |
|
{ |
|
"loss": 0.1247, |
|
"grad_norm": 1.7552311420440674, |
|
"learning_rate": 3.9337627613028686e-05, |
|
"epoch": 0.04, |
|
"step": 600 |
|
}, |
|
{ |
|
"loss": 0.1218, |
|
"grad_norm": 1.6224812269210815, |
|
"learning_rate": 3.9216091395235786e-05, |
|
"epoch": 0.04, |
|
"step": 700 |
|
}, |
|
{ |
|
"loss": 0.1176, |
|
"grad_norm": 1.8368713855743408, |
|
"learning_rate": 3.909455517744288e-05, |
|
"epoch": 0.05, |
|
"step": 800 |
|
}, |
|
{ |
|
"loss": 0.1119, |
|
"grad_norm": 1.4631482362747192, |
|
"learning_rate": 3.897301895964998e-05, |
|
"epoch": 0.05, |
|
"step": 900 |
|
}, |
|
{ |
|
"loss": 0.1098, |
|
"grad_norm": 1.2774118185043335, |
|
"learning_rate": 3.885148274185708e-05, |
|
"epoch": 0.06, |
|
"step": 1000 |
|
}, |
|
{ |
|
"loss": 0.1083, |
|
"grad_norm": 1.187245488166809, |
|
"learning_rate": 3.872994652406417e-05, |
|
"epoch": 0.07, |
|
"step": 1100 |
|
}, |
|
{ |
|
"loss": 0.1075, |
|
"grad_norm": 1.6492900848388672, |
|
"learning_rate": 3.860841030627127e-05, |
|
"epoch": 0.07, |
|
"step": 1200 |
|
}, |
|
{ |
|
"loss": 0.107, |
|
"grad_norm": 1.4514034986495972, |
|
"learning_rate": 3.8486874088478366e-05, |
|
"epoch": 0.08, |
|
"step": 1300 |
|
}, |
|
{ |
|
"loss": 0.1036, |
|
"grad_norm": 1.0488823652267456, |
|
"learning_rate": 3.8365337870685466e-05, |
|
"epoch": 0.08, |
|
"step": 1400 |
|
}, |
|
{ |
|
"loss": 0.1021, |
|
"grad_norm": 1.5489355325698853, |
|
"learning_rate": 3.8243801652892566e-05, |
|
"epoch": 0.09, |
|
"step": 1500 |
|
}, |
|
{ |
|
"loss": 0.1008, |
|
"grad_norm": 1.2730894088745117, |
|
"learning_rate": 3.812226543509966e-05, |
|
"epoch": 0.1, |
|
"step": 1600 |
|
}, |
|
{ |
|
"loss": 0.1004, |
|
"grad_norm": 1.6920459270477295, |
|
"learning_rate": 3.800072921730676e-05, |
|
"epoch": 0.1, |
|
"step": 1700 |
|
}, |
|
{ |
|
"loss": 0.1006, |
|
"grad_norm": 0.9863981008529663, |
|
"learning_rate": 3.787919299951386e-05, |
|
"epoch": 0.11, |
|
"step": 1800 |
|
}, |
|
{ |
|
"loss": 0.0982, |
|
"grad_norm": 0.9981995820999146, |
|
"learning_rate": 3.775765678172095e-05, |
|
"epoch": 0.12, |
|
"step": 1900 |
|
}, |
|
{ |
|
"loss": 0.0975, |
|
"grad_norm": 1.021620273590088, |
|
"learning_rate": 3.763612056392805e-05, |
|
"epoch": 0.12, |
|
"step": 2000 |
|
}, |
|
{ |
|
"loss": 0.0989, |
|
"grad_norm": 1.2811397314071655, |
|
"learning_rate": 3.751458434613515e-05, |
|
"epoch": 0.13, |
|
"step": 2100 |
|
}, |
|
{ |
|
"loss": 0.0959, |
|
"grad_norm": 1.5976190567016602, |
|
"learning_rate": 3.7393048128342246e-05, |
|
"epoch": 0.13, |
|
"step": 2200 |
|
}, |
|
{ |
|
"loss": 0.0961, |
|
"grad_norm": 0.9754481911659241, |
|
"learning_rate": 3.7271511910549346e-05, |
|
"epoch": 0.14, |
|
"step": 2300 |
|
}, |
|
{ |
|
"loss": 0.0956, |
|
"grad_norm": 0.9418678283691406, |
|
"learning_rate": 3.7149975692756447e-05, |
|
"epoch": 0.15, |
|
"step": 2400 |
|
}, |
|
{ |
|
"loss": 0.0954, |
|
"grad_norm": 1.294745922088623, |
|
"learning_rate": 3.702843947496354e-05, |
|
"epoch": 0.15, |
|
"step": 2500 |
|
}, |
|
{ |
|
"loss": 0.0943, |
|
"grad_norm": 1.3049461841583252, |
|
"learning_rate": 3.690690325717064e-05, |
|
"epoch": 0.16, |
|
"step": 2600 |
|
}, |
|
{ |
|
"loss": 0.0936, |
|
"grad_norm": 1.1144427061080933, |
|
"learning_rate": 3.678536703937774e-05, |
|
"epoch": 0.16, |
|
"step": 2700 |
|
}, |
|
{ |
|
"loss": 0.0939, |
|
"grad_norm": 1.3424856662750244, |
|
"learning_rate": 3.666383082158483e-05, |
|
"epoch": 0.17, |
|
"step": 2800 |
|
}, |
|
{ |
|
"loss": 0.0947, |
|
"grad_norm": 1.123299241065979, |
|
"learning_rate": 3.6542294603791933e-05, |
|
"epoch": 0.18, |
|
"step": 2900 |
|
}, |
|
{ |
|
"loss": 0.0932, |
|
"grad_norm": 1.456009864807129, |
|
"learning_rate": 3.642075838599903e-05, |
|
"epoch": 0.18, |
|
"step": 3000 |
|
}, |
|
{ |
|
"loss": 0.0927, |
|
"grad_norm": 1.4363266229629517, |
|
"learning_rate": 3.629922216820613e-05, |
|
"epoch": 0.19, |
|
"step": 3100 |
|
}, |
|
{ |
|
"loss": 0.0907, |
|
"grad_norm": 0.7776892185211182, |
|
"learning_rate": 3.617768595041323e-05, |
|
"epoch": 0.19, |
|
"step": 3200 |
|
}, |
|
{ |
|
"loss": 0.092, |
|
"grad_norm": 25.731966018676758, |
|
"learning_rate": 3.605614973262032e-05, |
|
"epoch": 0.2, |
|
"step": 3300 |
|
}, |
|
{ |
|
"loss": 0.091, |
|
"grad_norm": 0.9259088039398193, |
|
"learning_rate": 3.593461351482742e-05, |
|
"epoch": 0.21, |
|
"step": 3400 |
|
}, |
|
{ |
|
"loss": 0.0915, |
|
"grad_norm": 0.851094663143158, |
|
"learning_rate": 3.581307729703452e-05, |
|
"epoch": 0.21, |
|
"step": 3500 |
|
}, |
|
{ |
|
"loss": 0.0902, |
|
"grad_norm": 1.5700650215148926, |
|
"learning_rate": 3.5691541079241614e-05, |
|
"epoch": 0.22, |
|
"step": 3600 |
|
}, |
|
{ |
|
"loss": 0.0888, |
|
"grad_norm": 1.13387930393219, |
|
"learning_rate": 3.5570004861448714e-05, |
|
"epoch": 0.22, |
|
"step": 3700 |
|
}, |
|
{ |
|
"loss": 0.089, |
|
"grad_norm": 1.2357937097549438, |
|
"learning_rate": 3.5448468643655814e-05, |
|
"epoch": 0.23, |
|
"step": 3800 |
|
}, |
|
{ |
|
"loss": 0.0898, |
|
"grad_norm": 0.9063655734062195, |
|
"learning_rate": 3.532693242586291e-05, |
|
"epoch": 0.24, |
|
"step": 3900 |
|
}, |
|
{ |
|
"loss": 0.0893, |
|
"grad_norm": 1.1259723901748657, |
|
"learning_rate": 3.520539620807001e-05, |
|
"epoch": 0.24, |
|
"step": 4000 |
|
}, |
|
{ |
|
"loss": 0.0889, |
|
"grad_norm": 0.8327601552009583, |
|
"learning_rate": 3.508385999027711e-05, |
|
"epoch": 0.25, |
|
"step": 4100 |
|
}, |
|
{ |
|
"loss": 0.0862, |
|
"grad_norm": 1.2368316650390625, |
|
"learning_rate": 3.49623237724842e-05, |
|
"epoch": 0.25, |
|
"step": 4200 |
|
}, |
|
{ |
|
"loss": 0.0867, |
|
"grad_norm": 1.1474043130874634, |
|
"learning_rate": 3.48407875546913e-05, |
|
"epoch": 0.26, |
|
"step": 4300 |
|
}, |
|
{ |
|
"loss": 0.0858, |
|
"grad_norm": 0.6887868046760559, |
|
"learning_rate": 3.47192513368984e-05, |
|
"epoch": 0.27, |
|
"step": 4400 |
|
}, |
|
{ |
|
"loss": 0.0877, |
|
"grad_norm": 0.8170347809791565, |
|
"learning_rate": 3.4597715119105494e-05, |
|
"epoch": 0.27, |
|
"step": 4500 |
|
}, |
|
{ |
|
"loss": 0.0871, |
|
"grad_norm": 0.7361243367195129, |
|
"learning_rate": 3.4476178901312594e-05, |
|
"epoch": 0.28, |
|
"step": 4600 |
|
}, |
|
{ |
|
"loss": 0.0878, |
|
"grad_norm": 1.0975162982940674, |
|
"learning_rate": 3.435464268351969e-05, |
|
"epoch": 0.29, |
|
"step": 4700 |
|
}, |
|
{ |
|
"loss": 0.0863, |
|
"grad_norm": 0.931176483631134, |
|
"learning_rate": 3.4233106465726794e-05, |
|
"epoch": 0.29, |
|
"step": 4800 |
|
}, |
|
{ |
|
"loss": 0.0853, |
|
"grad_norm": 1.0259523391723633, |
|
"learning_rate": 3.411157024793389e-05, |
|
"epoch": 0.3, |
|
"step": 4900 |
|
}, |
|
{ |
|
"loss": 0.0876, |
|
"grad_norm": 1.1680504083633423, |
|
"learning_rate": 3.399003403014098e-05, |
|
"epoch": 0.3, |
|
"step": 5000 |
|
}, |
|
{ |
|
"loss": 0.0855, |
|
"grad_norm": 1.2358198165893555, |
|
"learning_rate": 3.386849781234809e-05, |
|
"epoch": 0.31, |
|
"step": 5100 |
|
}, |
|
{ |
|
"loss": 0.085, |
|
"grad_norm": 0.8484376668930054, |
|
"learning_rate": 3.374696159455518e-05, |
|
"epoch": 0.32, |
|
"step": 5200 |
|
}, |
|
{ |
|
"loss": 0.085, |
|
"grad_norm": 1.5419291257858276, |
|
"learning_rate": 3.3625425376762274e-05, |
|
"epoch": 0.32, |
|
"step": 5300 |
|
}, |
|
{ |
|
"loss": 0.0849, |
|
"grad_norm": 1.0334900617599487, |
|
"learning_rate": 3.3503889158969374e-05, |
|
"epoch": 0.33, |
|
"step": 5400 |
|
}, |
|
{ |
|
"loss": 0.0854, |
|
"grad_norm": 1.0367408990859985, |
|
"learning_rate": 3.3382352941176474e-05, |
|
"epoch": 0.33, |
|
"step": 5500 |
|
}, |
|
{ |
|
"loss": 0.0853, |
|
"grad_norm": 0.8429509401321411, |
|
"learning_rate": 3.326081672338357e-05, |
|
"epoch": 0.34, |
|
"step": 5600 |
|
}, |
|
{ |
|
"loss": 0.086, |
|
"grad_norm": 0.9059005379676819, |
|
"learning_rate": 3.313928050559067e-05, |
|
"epoch": 0.35, |
|
"step": 5700 |
|
}, |
|
{ |
|
"loss": 0.0846, |
|
"grad_norm": 1.1803362369537354, |
|
"learning_rate": 3.301774428779777e-05, |
|
"epoch": 0.35, |
|
"step": 5800 |
|
}, |
|
{ |
|
"loss": 0.0817, |
|
"grad_norm": 0.7263641357421875, |
|
"learning_rate": 3.289620807000487e-05, |
|
"epoch": 0.36, |
|
"step": 5900 |
|
}, |
|
{ |
|
"loss": 0.0831, |
|
"grad_norm": 0.8227238655090332, |
|
"learning_rate": 3.277467185221196e-05, |
|
"epoch": 0.36, |
|
"step": 6000 |
|
}, |
|
{ |
|
"loss": 0.0839, |
|
"grad_norm": 1.0349544286727905, |
|
"learning_rate": 3.2653135634419055e-05, |
|
"epoch": 0.37, |
|
"step": 6100 |
|
}, |
|
{ |
|
"loss": 0.0827, |
|
"grad_norm": 0.8446714282035828, |
|
"learning_rate": 3.253159941662616e-05, |
|
"epoch": 0.38, |
|
"step": 6200 |
|
}, |
|
{ |
|
"loss": 0.082, |
|
"grad_norm": 1.1419836282730103, |
|
"learning_rate": 3.2410063198833255e-05, |
|
"epoch": 0.38, |
|
"step": 6300 |
|
}, |
|
{ |
|
"loss": 0.0812, |
|
"grad_norm": 0.9505990147590637, |
|
"learning_rate": 3.228852698104035e-05, |
|
"epoch": 0.39, |
|
"step": 6400 |
|
}, |
|
{ |
|
"loss": 0.0806, |
|
"grad_norm": 1.0036993026733398, |
|
"learning_rate": 3.2166990763247455e-05, |
|
"epoch": 0.39, |
|
"step": 6500 |
|
}, |
|
{ |
|
"loss": 0.0819, |
|
"grad_norm": 0.7694116234779358, |
|
"learning_rate": 3.204545454545455e-05, |
|
"epoch": 0.4, |
|
"step": 6600 |
|
}, |
|
{ |
|
"loss": 0.0818, |
|
"grad_norm": 0.7389699220657349, |
|
"learning_rate": 3.192391832766165e-05, |
|
"epoch": 0.41, |
|
"step": 6700 |
|
}, |
|
{ |
|
"loss": 0.0829, |
|
"grad_norm": 0.8264873623847961, |
|
"learning_rate": 3.180238210986874e-05, |
|
"epoch": 0.41, |
|
"step": 6800 |
|
}, |
|
{ |
|
"loss": 0.0849, |
|
"grad_norm": 0.8844084143638611, |
|
"learning_rate": 3.168084589207584e-05, |
|
"epoch": 0.42, |
|
"step": 6900 |
|
}, |
|
{ |
|
"loss": 0.0816, |
|
"grad_norm": 0.8728023171424866, |
|
"learning_rate": 3.155930967428294e-05, |
|
"epoch": 0.42, |
|
"step": 7000 |
|
}, |
|
{ |
|
"loss": 0.0799, |
|
"grad_norm": 1.218404769897461, |
|
"learning_rate": 3.1437773456490035e-05, |
|
"epoch": 0.43, |
|
"step": 7100 |
|
}, |
|
{ |
|
"loss": 0.0797, |
|
"grad_norm": 0.7085688710212708, |
|
"learning_rate": 3.1316237238697135e-05, |
|
"epoch": 0.44, |
|
"step": 7200 |
|
}, |
|
{ |
|
"loss": 0.0795, |
|
"grad_norm": 0.8446517586708069, |
|
"learning_rate": 3.1194701020904235e-05, |
|
"epoch": 0.44, |
|
"step": 7300 |
|
}, |
|
{ |
|
"loss": 0.0817, |
|
"grad_norm": 1.3226453065872192, |
|
"learning_rate": 3.107316480311133e-05, |
|
"epoch": 0.45, |
|
"step": 7400 |
|
}, |
|
{ |
|
"loss": 0.0816, |
|
"grad_norm": 0.7685155868530273, |
|
"learning_rate": 3.095162858531843e-05, |
|
"epoch": 0.46, |
|
"step": 7500 |
|
}, |
|
{ |
|
"loss": 0.0806, |
|
"grad_norm": 0.7135798335075378, |
|
"learning_rate": 3.083009236752553e-05, |
|
"epoch": 0.46, |
|
"step": 7600 |
|
}, |
|
{ |
|
"loss": 0.0795, |
|
"grad_norm": 1.0276037454605103, |
|
"learning_rate": 3.070855614973262e-05, |
|
"epoch": 0.47, |
|
"step": 7700 |
|
}, |
|
{ |
|
"loss": 0.081, |
|
"grad_norm": 1.1788092851638794, |
|
"learning_rate": 3.058701993193972e-05, |
|
"epoch": 0.47, |
|
"step": 7800 |
|
}, |
|
{ |
|
"loss": 0.0791, |
|
"grad_norm": 1.0305782556533813, |
|
"learning_rate": 3.046548371414682e-05, |
|
"epoch": 0.48, |
|
"step": 7900 |
|
}, |
|
{ |
|
"loss": 0.0805, |
|
"grad_norm": 1.4414223432540894, |
|
"learning_rate": 3.0343947496353915e-05, |
|
"epoch": 0.49, |
|
"step": 8000 |
|
}, |
|
{ |
|
"loss": 0.0799, |
|
"grad_norm": 0.8137165904045105, |
|
"learning_rate": 3.0222411278561012e-05, |
|
"epoch": 0.49, |
|
"step": 8100 |
|
}, |
|
{ |
|
"loss": 0.08, |
|
"grad_norm": 1.1238079071044922, |
|
"learning_rate": 3.0100875060768112e-05, |
|
"epoch": 0.5, |
|
"step": 8200 |
|
}, |
|
{ |
|
"loss": 0.0792, |
|
"grad_norm": 0.9724037647247314, |
|
"learning_rate": 2.997933884297521e-05, |
|
"epoch": 0.5, |
|
"step": 8300 |
|
}, |
|
{ |
|
"loss": 0.0793, |
|
"grad_norm": 1.0247116088867188, |
|
"learning_rate": 2.9857802625182306e-05, |
|
"epoch": 0.51, |
|
"step": 8400 |
|
}, |
|
{ |
|
"loss": 0.0783, |
|
"grad_norm": 1.454062581062317, |
|
"learning_rate": 2.9737481769567335e-05, |
|
"epoch": 0.52, |
|
"step": 8500 |
|
}, |
|
{ |
|
"loss": 0.0788, |
|
"grad_norm": 0.7570217251777649, |
|
"learning_rate": 2.961594555177443e-05, |
|
"epoch": 0.52, |
|
"step": 8600 |
|
}, |
|
{ |
|
"loss": 0.0768, |
|
"grad_norm": 1.1738083362579346, |
|
"learning_rate": 2.9494409333981528e-05, |
|
"epoch": 0.53, |
|
"step": 8700 |
|
}, |
|
{ |
|
"loss": 0.0778, |
|
"grad_norm": 0.7776427268981934, |
|
"learning_rate": 2.9372873116188625e-05, |
|
"epoch": 0.53, |
|
"step": 8800 |
|
}, |
|
{ |
|
"loss": 0.0763, |
|
"grad_norm": 1.226198673248291, |
|
"learning_rate": 2.9251336898395725e-05, |
|
"epoch": 0.54, |
|
"step": 8900 |
|
}, |
|
{ |
|
"loss": 0.0761, |
|
"grad_norm": 0.8859773874282837, |
|
"learning_rate": 2.912980068060282e-05, |
|
"epoch": 0.55, |
|
"step": 9000 |
|
}, |
|
{ |
|
"loss": 0.0765, |
|
"grad_norm": 1.0220259428024292, |
|
"learning_rate": 2.9008264462809918e-05, |
|
"epoch": 0.55, |
|
"step": 9100 |
|
}, |
|
{ |
|
"loss": 0.0777, |
|
"grad_norm": 1.0430243015289307, |
|
"learning_rate": 2.888672824501702e-05, |
|
"epoch": 0.56, |
|
"step": 9200 |
|
}, |
|
{ |
|
"loss": 0.0775, |
|
"grad_norm": 1.1380356550216675, |
|
"learning_rate": 2.8765192027224115e-05, |
|
"epoch": 0.56, |
|
"step": 9300 |
|
}, |
|
{ |
|
"loss": 0.0775, |
|
"grad_norm": 0.6778531670570374, |
|
"learning_rate": 2.8643655809431212e-05, |
|
"epoch": 0.57, |
|
"step": 9400 |
|
}, |
|
{ |
|
"loss": 0.0782, |
|
"grad_norm": 1.0413175821304321, |
|
"learning_rate": 2.852211959163831e-05, |
|
"epoch": 0.58, |
|
"step": 9500 |
|
}, |
|
{ |
|
"loss": 0.0791, |
|
"grad_norm": 1.1399835348129272, |
|
"learning_rate": 2.840058337384541e-05, |
|
"epoch": 0.58, |
|
"step": 9600 |
|
}, |
|
{ |
|
"loss": 0.0763, |
|
"grad_norm": 0.968399703502655, |
|
"learning_rate": 2.8279047156052505e-05, |
|
"epoch": 0.59, |
|
"step": 9700 |
|
}, |
|
{ |
|
"loss": 0.0763, |
|
"grad_norm": 1.0254497528076172, |
|
"learning_rate": 2.8157510938259602e-05, |
|
"epoch": 0.59, |
|
"step": 9800 |
|
}, |
|
{ |
|
"loss": 0.0771, |
|
"grad_norm": 0.8642473220825195, |
|
"learning_rate": 2.8035974720466702e-05, |
|
"epoch": 0.6, |
|
"step": 9900 |
|
}, |
|
{ |
|
"loss": 0.0772, |
|
"grad_norm": 1.1130231618881226, |
|
"learning_rate": 2.79144385026738e-05, |
|
"epoch": 0.61, |
|
"step": 10000 |
|
}, |
|
{ |
|
"loss": 0.0793, |
|
"grad_norm": 1.4455962181091309, |
|
"learning_rate": 2.7792902284880895e-05, |
|
"epoch": 0.61, |
|
"step": 10100 |
|
}, |
|
{ |
|
"loss": 0.077, |
|
"grad_norm": 0.9273576736450195, |
|
"learning_rate": 2.7671366067087992e-05, |
|
"epoch": 0.62, |
|
"step": 10200 |
|
}, |
|
{ |
|
"loss": 0.0766, |
|
"grad_norm": 0.8223456740379333, |
|
"learning_rate": 2.7549829849295092e-05, |
|
"epoch": 0.62, |
|
"step": 10300 |
|
}, |
|
{ |
|
"loss": 0.0765, |
|
"grad_norm": 1.1068949699401855, |
|
"learning_rate": 2.742829363150219e-05, |
|
"epoch": 0.63, |
|
"step": 10400 |
|
}, |
|
{ |
|
"loss": 0.0762, |
|
"grad_norm": 1.0787135362625122, |
|
"learning_rate": 2.7306757413709285e-05, |
|
"epoch": 0.64, |
|
"step": 10500 |
|
}, |
|
{ |
|
"loss": 0.0765, |
|
"grad_norm": 0.6019480228424072, |
|
"learning_rate": 2.7185221195916386e-05, |
|
"epoch": 0.64, |
|
"step": 10600 |
|
}, |
|
{ |
|
"loss": 0.0756, |
|
"grad_norm": 0.7752580046653748, |
|
"learning_rate": 2.7063684978123482e-05, |
|
"epoch": 0.65, |
|
"step": 10700 |
|
}, |
|
{ |
|
"loss": 0.0762, |
|
"grad_norm": 0.9023341536521912, |
|
"learning_rate": 2.6943364122508508e-05, |
|
"epoch": 0.66, |
|
"step": 10800 |
|
}, |
|
{ |
|
"loss": 0.0759, |
|
"grad_norm": 1.1154266595840454, |
|
"learning_rate": 2.6821827904715608e-05, |
|
"epoch": 0.66, |
|
"step": 10900 |
|
}, |
|
{ |
|
"loss": 0.0752, |
|
"grad_norm": 1.5197564363479614, |
|
"learning_rate": 2.6700291686922705e-05, |
|
"epoch": 0.67, |
|
"step": 11000 |
|
}, |
|
{ |
|
"loss": 0.0757, |
|
"grad_norm": 0.8111494183540344, |
|
"learning_rate": 2.65787554691298e-05, |
|
"epoch": 0.67, |
|
"step": 11100 |
|
}, |
|
{ |
|
"loss": 0.0749, |
|
"grad_norm": 0.6413083076477051, |
|
"learning_rate": 2.6457219251336898e-05, |
|
"epoch": 0.68, |
|
"step": 11200 |
|
}, |
|
{ |
|
"loss": 0.0754, |
|
"grad_norm": 0.8996323943138123, |
|
"learning_rate": 2.6335683033544e-05, |
|
"epoch": 0.69, |
|
"step": 11300 |
|
}, |
|
{ |
|
"loss": 0.0744, |
|
"grad_norm": 0.7931196093559265, |
|
"learning_rate": 2.6214146815751095e-05, |
|
"epoch": 0.69, |
|
"step": 11400 |
|
}, |
|
{ |
|
"loss": 0.0742, |
|
"grad_norm": 1.0821586847305298, |
|
"learning_rate": 2.609261059795819e-05, |
|
"epoch": 0.7, |
|
"step": 11500 |
|
}, |
|
{ |
|
"loss": 0.0722, |
|
"grad_norm": 0.9964590072631836, |
|
"learning_rate": 2.5971074380165292e-05, |
|
"epoch": 0.7, |
|
"step": 11600 |
|
}, |
|
{ |
|
"loss": 0.0752, |
|
"grad_norm": 0.7918893694877625, |
|
"learning_rate": 2.584953816237239e-05, |
|
"epoch": 0.71, |
|
"step": 11700 |
|
}, |
|
{ |
|
"loss": 0.0734, |
|
"grad_norm": 0.6565855145454407, |
|
"learning_rate": 2.5728001944579485e-05, |
|
"epoch": 0.72, |
|
"step": 11800 |
|
}, |
|
{ |
|
"loss": 0.0717, |
|
"grad_norm": 1.9885566234588623, |
|
"learning_rate": 2.5606465726786582e-05, |
|
"epoch": 0.72, |
|
"step": 11900 |
|
}, |
|
{ |
|
"loss": 0.0747, |
|
"grad_norm": 0.6101750135421753, |
|
"learning_rate": 2.5484929508993682e-05, |
|
"epoch": 0.73, |
|
"step": 12000 |
|
}, |
|
{ |
|
"loss": 0.073, |
|
"grad_norm": 1.001930594444275, |
|
"learning_rate": 2.536339329120078e-05, |
|
"epoch": 0.73, |
|
"step": 12100 |
|
}, |
|
{ |
|
"loss": 0.074, |
|
"grad_norm": 0.880673348903656, |
|
"learning_rate": 2.5241857073407875e-05, |
|
"epoch": 0.74, |
|
"step": 12200 |
|
}, |
|
{ |
|
"loss": 0.0738, |
|
"grad_norm": 0.7980429530143738, |
|
"learning_rate": 2.5120320855614975e-05, |
|
"epoch": 0.75, |
|
"step": 12300 |
|
}, |
|
{ |
|
"loss": 0.0758, |
|
"grad_norm": 1.0153135061264038, |
|
"learning_rate": 2.4998784637822072e-05, |
|
"epoch": 0.75, |
|
"step": 12400 |
|
}, |
|
{ |
|
"loss": 0.0742, |
|
"grad_norm": 0.8344822525978088, |
|
"learning_rate": 2.487724842002917e-05, |
|
"epoch": 0.76, |
|
"step": 12500 |
|
}, |
|
{ |
|
"loss": 0.0738, |
|
"grad_norm": 0.6752304434776306, |
|
"learning_rate": 2.4755712202236272e-05, |
|
"epoch": 0.76, |
|
"step": 12600 |
|
}, |
|
{ |
|
"loss": 0.0732, |
|
"grad_norm": 1.1106210947036743, |
|
"learning_rate": 2.4634175984443366e-05, |
|
"epoch": 0.77, |
|
"step": 12700 |
|
}, |
|
{ |
|
"loss": 0.0754, |
|
"grad_norm": 0.8022058606147766, |
|
"learning_rate": 2.4512639766650462e-05, |
|
"epoch": 0.78, |
|
"step": 12800 |
|
}, |
|
{ |
|
"loss": 0.0735, |
|
"grad_norm": 0.737308144569397, |
|
"learning_rate": 2.439110354885756e-05, |
|
"epoch": 0.78, |
|
"step": 12900 |
|
}, |
|
{ |
|
"loss": 0.0738, |
|
"grad_norm": 2.094043493270874, |
|
"learning_rate": 2.4269567331064662e-05, |
|
"epoch": 0.79, |
|
"step": 13000 |
|
}, |
|
{ |
|
"loss": 0.072, |
|
"grad_norm": 1.1105279922485352, |
|
"learning_rate": 2.4148031113271756e-05, |
|
"epoch": 0.79, |
|
"step": 13100 |
|
}, |
|
{ |
|
"loss": 0.0716, |
|
"grad_norm": 1.2243571281433105, |
|
"learning_rate": 2.4026494895478852e-05, |
|
"epoch": 0.8, |
|
"step": 13200 |
|
}, |
|
{ |
|
"loss": 0.0718, |
|
"grad_norm": 1.0883300304412842, |
|
"learning_rate": 2.3904958677685956e-05, |
|
"epoch": 0.81, |
|
"step": 13300 |
|
}, |
|
{ |
|
"loss": 0.0727, |
|
"grad_norm": 0.9934273362159729, |
|
"learning_rate": 2.378342245989305e-05, |
|
"epoch": 0.81, |
|
"step": 13400 |
|
}, |
|
{ |
|
"loss": 0.0721, |
|
"grad_norm": 0.7145100831985474, |
|
"learning_rate": 2.3661886242100146e-05, |
|
"epoch": 0.82, |
|
"step": 13500 |
|
}, |
|
{ |
|
"loss": 0.0721, |
|
"grad_norm": 0.8873516321182251, |
|
"learning_rate": 2.3540350024307243e-05, |
|
"epoch": 0.83, |
|
"step": 13600 |
|
}, |
|
{ |
|
"loss": 0.0723, |
|
"grad_norm": 0.7798359990119934, |
|
"learning_rate": 2.3418813806514346e-05, |
|
"epoch": 0.83, |
|
"step": 13700 |
|
}, |
|
{ |
|
"loss": 0.0726, |
|
"grad_norm": 0.9411553740501404, |
|
"learning_rate": 2.329727758872144e-05, |
|
"epoch": 0.84, |
|
"step": 13800 |
|
}, |
|
{ |
|
"loss": 0.0715, |
|
"grad_norm": 0.7994709610939026, |
|
"learning_rate": 2.3175741370928536e-05, |
|
"epoch": 0.84, |
|
"step": 13900 |
|
}, |
|
{ |
|
"loss": 0.0732, |
|
"grad_norm": 0.5489715337753296, |
|
"learning_rate": 2.305420515313564e-05, |
|
"epoch": 0.85, |
|
"step": 14000 |
|
}, |
|
{ |
|
"loss": 0.0699, |
|
"grad_norm": 0.5710996389389038, |
|
"learning_rate": 2.2932668935342736e-05, |
|
"epoch": 0.86, |
|
"step": 14100 |
|
}, |
|
{ |
|
"loss": 0.073, |
|
"grad_norm": 0.7003745436668396, |
|
"learning_rate": 2.281113271754983e-05, |
|
"epoch": 0.86, |
|
"step": 14200 |
|
}, |
|
{ |
|
"loss": 0.0722, |
|
"grad_norm": 0.6743086576461792, |
|
"learning_rate": 2.2689596499756926e-05, |
|
"epoch": 0.87, |
|
"step": 14300 |
|
}, |
|
{ |
|
"loss": 0.0699, |
|
"grad_norm": 0.6730968356132507, |
|
"learning_rate": 2.256806028196403e-05, |
|
"epoch": 0.87, |
|
"step": 14400 |
|
}, |
|
{ |
|
"loss": 0.0719, |
|
"grad_norm": 0.7155641913414001, |
|
"learning_rate": 2.2446524064171126e-05, |
|
"epoch": 0.88, |
|
"step": 14500 |
|
}, |
|
{ |
|
"loss": 0.0708, |
|
"grad_norm": 0.8122462630271912, |
|
"learning_rate": 2.232498784637822e-05, |
|
"epoch": 0.89, |
|
"step": 14600 |
|
}, |
|
{ |
|
"loss": 0.0718, |
|
"grad_norm": 0.8022533655166626, |
|
"learning_rate": 2.2203451628585323e-05, |
|
"epoch": 0.89, |
|
"step": 14700 |
|
}, |
|
{ |
|
"loss": 0.0712, |
|
"grad_norm": 0.545359194278717, |
|
"learning_rate": 2.208191541079242e-05, |
|
"epoch": 0.9, |
|
"step": 14800 |
|
}, |
|
{ |
|
"loss": 0.0711, |
|
"grad_norm": 0.8318025469779968, |
|
"learning_rate": 2.1960379192999513e-05, |
|
"epoch": 0.9, |
|
"step": 14900 |
|
}, |
|
{ |
|
"loss": 0.0706, |
|
"grad_norm": 0.9334779381752014, |
|
"learning_rate": 2.1838842975206616e-05, |
|
"epoch": 0.91, |
|
"step": 15000 |
|
}, |
|
{ |
|
"loss": 0.0701, |
|
"grad_norm": 0.8202875256538391, |
|
"learning_rate": 2.1717306757413713e-05, |
|
"epoch": 0.92, |
|
"step": 15100 |
|
}, |
|
{ |
|
"loss": 0.07, |
|
"grad_norm": 0.8788963556289673, |
|
"learning_rate": 2.159577053962081e-05, |
|
"epoch": 0.92, |
|
"step": 15200 |
|
}, |
|
{ |
|
"loss": 0.0713, |
|
"grad_norm": 1.023823618888855, |
|
"learning_rate": 2.1474234321827903e-05, |
|
"epoch": 0.93, |
|
"step": 15300 |
|
}, |
|
{ |
|
"loss": 0.0697, |
|
"grad_norm": 0.8784018158912659, |
|
"learning_rate": 2.1353913466212936e-05, |
|
"epoch": 0.93, |
|
"step": 15400 |
|
}, |
|
{ |
|
"loss": 0.0695, |
|
"grad_norm": 1.1254814863204956, |
|
"learning_rate": 2.1232377248420032e-05, |
|
"epoch": 0.94, |
|
"step": 15500 |
|
}, |
|
{ |
|
"loss": 0.0697, |
|
"grad_norm": 0.9760749340057373, |
|
"learning_rate": 2.1110841030627126e-05, |
|
"epoch": 0.95, |
|
"step": 15600 |
|
}, |
|
{ |
|
"loss": 0.0709, |
|
"grad_norm": 1.0121357440948486, |
|
"learning_rate": 2.098930481283423e-05, |
|
"epoch": 0.95, |
|
"step": 15700 |
|
}, |
|
{ |
|
"loss": 0.0717, |
|
"grad_norm": 0.7810111045837402, |
|
"learning_rate": 2.0867768595041326e-05, |
|
"epoch": 0.96, |
|
"step": 15800 |
|
}, |
|
{ |
|
"loss": 0.0692, |
|
"grad_norm": 0.6813214421272278, |
|
"learning_rate": 2.074623237724842e-05, |
|
"epoch": 0.96, |
|
"step": 15900 |
|
}, |
|
{ |
|
"loss": 0.0696, |
|
"grad_norm": 0.7685451507568359, |
|
"learning_rate": 2.0624696159455516e-05, |
|
"epoch": 0.97, |
|
"step": 16000 |
|
}, |
|
{ |
|
"loss": 0.0702, |
|
"grad_norm": 3.3225691318511963, |
|
"learning_rate": 2.050315994166262e-05, |
|
"epoch": 0.98, |
|
"step": 16100 |
|
}, |
|
{ |
|
"loss": 0.0702, |
|
"grad_norm": 0.7979671955108643, |
|
"learning_rate": 2.0381623723869716e-05, |
|
"epoch": 0.98, |
|
"step": 16200 |
|
}, |
|
{ |
|
"loss": 0.0691, |
|
"grad_norm": 3.4929583072662354, |
|
"learning_rate": 2.026008750607681e-05, |
|
"epoch": 0.99, |
|
"step": 16300 |
|
}, |
|
{ |
|
"loss": 0.0703, |
|
"grad_norm": 0.7738245725631714, |
|
"learning_rate": 2.0138551288283913e-05, |
|
"epoch": 1.0, |
|
"step": 16400 |
|
}, |
|
{ |
|
"eval_loss": 0.06881729513406754, |
|
"eval_f1": 0.8973916467400326, |
|
"eval_precision": 0.9049522471305407, |
|
"eval_recall": 0.8906029559155776, |
|
"eval_accuracy": 0.9730252863363563, |
|
"eval_runtime": 304.4852, |
|
"eval_samples_per_second": 86.796, |
|
"eval_steps_per_second": 10.851, |
|
"epoch": 1.0, |
|
"step": 16481 |
|
}, |
|
{ |
|
"loss": 0.0684, |
|
"grad_norm": 0.891858696937561, |
|
"learning_rate": 2.001701507049101e-05, |
|
"epoch": 1.0, |
|
"step": 16500 |
|
}, |
|
{ |
|
"loss": 0.0619, |
|
"grad_norm": 0.6408938765525818, |
|
"learning_rate": 1.9895478852698106e-05, |
|
"epoch": 1.01, |
|
"step": 16600 |
|
}, |
|
{ |
|
"loss": 0.0629, |
|
"grad_norm": 0.7390792965888977, |
|
"learning_rate": 1.9773942634905203e-05, |
|
"epoch": 1.01, |
|
"step": 16700 |
|
}, |
|
{ |
|
"loss": 0.0604, |
|
"grad_norm": 0.5206795930862427, |
|
"learning_rate": 1.9652406417112303e-05, |
|
"epoch": 1.02, |
|
"step": 16800 |
|
}, |
|
{ |
|
"loss": 0.0613, |
|
"grad_norm": 0.909116268157959, |
|
"learning_rate": 1.95308701993194e-05, |
|
"epoch": 1.03, |
|
"step": 16900 |
|
}, |
|
{ |
|
"loss": 0.0616, |
|
"grad_norm": 0.8701964020729065, |
|
"learning_rate": 1.9409333981526496e-05, |
|
"epoch": 1.03, |
|
"step": 17000 |
|
}, |
|
{ |
|
"loss": 0.0625, |
|
"grad_norm": 1.0762407779693604, |
|
"learning_rate": 1.9287797763733593e-05, |
|
"epoch": 1.04, |
|
"step": 17100 |
|
}, |
|
{ |
|
"loss": 0.0615, |
|
"grad_norm": 0.7816362380981445, |
|
"learning_rate": 1.9166261545940693e-05, |
|
"epoch": 1.04, |
|
"step": 17200 |
|
}, |
|
{ |
|
"loss": 0.0626, |
|
"grad_norm": 0.6983965039253235, |
|
"learning_rate": 1.904594069032572e-05, |
|
"epoch": 1.05, |
|
"step": 17300 |
|
}, |
|
{ |
|
"loss": 0.0621, |
|
"grad_norm": 0.910698413848877, |
|
"learning_rate": 1.8924404472532816e-05, |
|
"epoch": 1.06, |
|
"step": 17400 |
|
}, |
|
{ |
|
"loss": 0.0631, |
|
"grad_norm": 0.8654133677482605, |
|
"learning_rate": 1.8802868254739916e-05, |
|
"epoch": 1.06, |
|
"step": 17500 |
|
}, |
|
{ |
|
"loss": 0.062, |
|
"grad_norm": 0.8351789712905884, |
|
"learning_rate": 1.8681332036947012e-05, |
|
"epoch": 1.07, |
|
"step": 17600 |
|
}, |
|
{ |
|
"loss": 0.0604, |
|
"grad_norm": 0.7861587405204773, |
|
"learning_rate": 1.855979581915411e-05, |
|
"epoch": 1.07, |
|
"step": 17700 |
|
}, |
|
{ |
|
"loss": 0.0609, |
|
"grad_norm": 0.7295276522636414, |
|
"learning_rate": 1.843825960136121e-05, |
|
"epoch": 1.08, |
|
"step": 17800 |
|
}, |
|
{ |
|
"loss": 0.0616, |
|
"grad_norm": 1.0210868120193481, |
|
"learning_rate": 1.8316723383568306e-05, |
|
"epoch": 1.09, |
|
"step": 17900 |
|
}, |
|
{ |
|
"loss": 0.0616, |
|
"grad_norm": 0.8220874071121216, |
|
"learning_rate": 1.8195187165775403e-05, |
|
"epoch": 1.09, |
|
"step": 18000 |
|
}, |
|
{ |
|
"loss": 0.0607, |
|
"grad_norm": 0.7961727380752563, |
|
"learning_rate": 1.80736509479825e-05, |
|
"epoch": 1.1, |
|
"step": 18100 |
|
}, |
|
{ |
|
"loss": 0.0614, |
|
"grad_norm": 1.0390113592147827, |
|
"learning_rate": 1.79521147301896e-05, |
|
"epoch": 1.1, |
|
"step": 18200 |
|
}, |
|
{ |
|
"loss": 0.0625, |
|
"grad_norm": 0.8423497080802917, |
|
"learning_rate": 1.7830578512396696e-05, |
|
"epoch": 1.11, |
|
"step": 18300 |
|
}, |
|
{ |
|
"loss": 0.0618, |
|
"grad_norm": 0.7576957941055298, |
|
"learning_rate": 1.7709042294603793e-05, |
|
"epoch": 1.12, |
|
"step": 18400 |
|
}, |
|
{ |
|
"loss": 0.061, |
|
"grad_norm": 0.7174555659294128, |
|
"learning_rate": 1.7587506076810893e-05, |
|
"epoch": 1.12, |
|
"step": 18500 |
|
}, |
|
{ |
|
"loss": 0.0602, |
|
"grad_norm": 0.7977816462516785, |
|
"learning_rate": 1.746596985901799e-05, |
|
"epoch": 1.13, |
|
"step": 18600 |
|
}, |
|
{ |
|
"loss": 0.0617, |
|
"grad_norm": 0.8125550150871277, |
|
"learning_rate": 1.7344433641225086e-05, |
|
"epoch": 1.13, |
|
"step": 18700 |
|
}, |
|
{ |
|
"loss": 0.0605, |
|
"grad_norm": 1.3914258480072021, |
|
"learning_rate": 1.7222897423432183e-05, |
|
"epoch": 1.14, |
|
"step": 18800 |
|
}, |
|
{ |
|
"loss": 0.0614, |
|
"grad_norm": 0.8273860812187195, |
|
"learning_rate": 1.7101361205639283e-05, |
|
"epoch": 1.15, |
|
"step": 18900 |
|
}, |
|
{ |
|
"loss": 0.0606, |
|
"grad_norm": 0.7267687916755676, |
|
"learning_rate": 1.697982498784638e-05, |
|
"epoch": 1.15, |
|
"step": 19000 |
|
}, |
|
{ |
|
"loss": 0.0624, |
|
"grad_norm": 1.075861930847168, |
|
"learning_rate": 1.6858288770053476e-05, |
|
"epoch": 1.16, |
|
"step": 19100 |
|
}, |
|
{ |
|
"loss": 0.062, |
|
"grad_norm": 0.867139995098114, |
|
"learning_rate": 1.6736752552260576e-05, |
|
"epoch": 1.16, |
|
"step": 19200 |
|
}, |
|
{ |
|
"loss": 0.0595, |
|
"grad_norm": 0.6730388402938843, |
|
"learning_rate": 1.6615216334467673e-05, |
|
"epoch": 1.17, |
|
"step": 19300 |
|
}, |
|
{ |
|
"loss": 0.0603, |
|
"grad_norm": 0.7329290509223938, |
|
"learning_rate": 1.649368011667477e-05, |
|
"epoch": 1.18, |
|
"step": 19400 |
|
}, |
|
{ |
|
"loss": 0.0605, |
|
"grad_norm": 1.0000228881835938, |
|
"learning_rate": 1.6372143898881866e-05, |
|
"epoch": 1.18, |
|
"step": 19500 |
|
}, |
|
{ |
|
"loss": 0.0599, |
|
"grad_norm": 1.0037493705749512, |
|
"learning_rate": 1.6250607681088967e-05, |
|
"epoch": 1.19, |
|
"step": 19600 |
|
}, |
|
{ |
|
"loss": 0.0616, |
|
"grad_norm": 0.7647894024848938, |
|
"learning_rate": 1.6129071463296063e-05, |
|
"epoch": 1.2, |
|
"step": 19700 |
|
}, |
|
{ |
|
"loss": 0.0604, |
|
"grad_norm": 0.78948575258255, |
|
"learning_rate": 1.600753524550316e-05, |
|
"epoch": 1.2, |
|
"step": 19800 |
|
}, |
|
{ |
|
"loss": 0.0609, |
|
"grad_norm": 0.8443770408630371, |
|
"learning_rate": 1.588599902771026e-05, |
|
"epoch": 1.21, |
|
"step": 19900 |
|
}, |
|
{ |
|
"loss": 0.0599, |
|
"grad_norm": 1.1531789302825928, |
|
"learning_rate": 1.5764462809917357e-05, |
|
"epoch": 1.21, |
|
"step": 20000 |
|
}, |
|
{ |
|
"loss": 0.0605, |
|
"grad_norm": 0.7325319647789001, |
|
"learning_rate": 1.5642926592124453e-05, |
|
"epoch": 1.22, |
|
"step": 20100 |
|
}, |
|
{ |
|
"loss": 0.0606, |
|
"grad_norm": 0.8585038185119629, |
|
"learning_rate": 1.5521390374331553e-05, |
|
"epoch": 1.23, |
|
"step": 20200 |
|
}, |
|
{ |
|
"loss": 0.0602, |
|
"grad_norm": 0.6652311086654663, |
|
"learning_rate": 1.539985415653865e-05, |
|
"epoch": 1.23, |
|
"step": 20300 |
|
}, |
|
{ |
|
"loss": 0.0605, |
|
"grad_norm": 0.9240396618843079, |
|
"learning_rate": 1.5278317938745747e-05, |
|
"epoch": 1.24, |
|
"step": 20400 |
|
}, |
|
{ |
|
"loss": 0.0609, |
|
"grad_norm": 0.9992942214012146, |
|
"learning_rate": 1.5156781720952845e-05, |
|
"epoch": 1.24, |
|
"step": 20500 |
|
}, |
|
{ |
|
"loss": 0.0604, |
|
"grad_norm": 0.7454150915145874, |
|
"learning_rate": 1.5035245503159944e-05, |
|
"epoch": 1.25, |
|
"step": 20600 |
|
}, |
|
{ |
|
"loss": 0.0598, |
|
"grad_norm": 0.8551883101463318, |
|
"learning_rate": 1.491370928536704e-05, |
|
"epoch": 1.26, |
|
"step": 20700 |
|
}, |
|
{ |
|
"loss": 0.061, |
|
"grad_norm": 0.8273564577102661, |
|
"learning_rate": 1.4792173067574139e-05, |
|
"epoch": 1.26, |
|
"step": 20800 |
|
}, |
|
{ |
|
"loss": 0.06, |
|
"grad_norm": 0.925244927406311, |
|
"learning_rate": 1.4671852211959166e-05, |
|
"epoch": 1.27, |
|
"step": 20900 |
|
}, |
|
{ |
|
"loss": 0.0587, |
|
"grad_norm": 0.5892955660820007, |
|
"learning_rate": 1.4550315994166261e-05, |
|
"epoch": 1.27, |
|
"step": 21000 |
|
}, |
|
{ |
|
"loss": 0.0602, |
|
"grad_norm": 0.7904210090637207, |
|
"learning_rate": 1.4428779776373361e-05, |
|
"epoch": 1.28, |
|
"step": 21100 |
|
}, |
|
{ |
|
"loss": 0.0625, |
|
"grad_norm": 1.2804646492004395, |
|
"learning_rate": 1.430724355858046e-05, |
|
"epoch": 1.29, |
|
"step": 21200 |
|
}, |
|
{ |
|
"loss": 0.0607, |
|
"grad_norm": 0.9952909350395203, |
|
"learning_rate": 1.4185707340787556e-05, |
|
"epoch": 1.29, |
|
"step": 21300 |
|
}, |
|
{ |
|
"loss": 0.0602, |
|
"grad_norm": 0.9036094546318054, |
|
"learning_rate": 1.4064171122994655e-05, |
|
"epoch": 1.3, |
|
"step": 21400 |
|
}, |
|
{ |
|
"loss": 0.0594, |
|
"grad_norm": 0.8128438591957092, |
|
"learning_rate": 1.3942634905201751e-05, |
|
"epoch": 1.3, |
|
"step": 21500 |
|
}, |
|
{ |
|
"loss": 0.0593, |
|
"grad_norm": 0.786703884601593, |
|
"learning_rate": 1.382109868740885e-05, |
|
"epoch": 1.31, |
|
"step": 21600 |
|
}, |
|
{ |
|
"loss": 0.0604, |
|
"grad_norm": 1.107258677482605, |
|
"learning_rate": 1.3699562469615946e-05, |
|
"epoch": 1.32, |
|
"step": 21700 |
|
}, |
|
{ |
|
"loss": 0.0596, |
|
"grad_norm": 1.0990906953811646, |
|
"learning_rate": 1.3578026251823045e-05, |
|
"epoch": 1.32, |
|
"step": 21800 |
|
}, |
|
{ |
|
"loss": 0.0611, |
|
"grad_norm": 0.7040949463844299, |
|
"learning_rate": 1.3456490034030143e-05, |
|
"epoch": 1.33, |
|
"step": 21900 |
|
}, |
|
{ |
|
"loss": 0.0582, |
|
"grad_norm": 0.7568740248680115, |
|
"learning_rate": 1.333495381623724e-05, |
|
"epoch": 1.33, |
|
"step": 22000 |
|
}, |
|
{ |
|
"loss": 0.0595, |
|
"grad_norm": 0.6342681646347046, |
|
"learning_rate": 1.3213417598444338e-05, |
|
"epoch": 1.34, |
|
"step": 22100 |
|
}, |
|
{ |
|
"loss": 0.0597, |
|
"grad_norm": 0.7555422186851501, |
|
"learning_rate": 1.3091881380651435e-05, |
|
"epoch": 1.35, |
|
"step": 22200 |
|
}, |
|
{ |
|
"loss": 0.0587, |
|
"grad_norm": 0.8620259165763855, |
|
"learning_rate": 1.2970345162858533e-05, |
|
"epoch": 1.35, |
|
"step": 22300 |
|
}, |
|
{ |
|
"loss": 0.0586, |
|
"grad_norm": 1.4132779836654663, |
|
"learning_rate": 1.2848808945065632e-05, |
|
"epoch": 1.36, |
|
"step": 22400 |
|
}, |
|
{ |
|
"loss": 0.0594, |
|
"grad_norm": 0.9352446794509888, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"epoch": 1.37, |
|
"step": 22500 |
|
}, |
|
{ |
|
"loss": 0.0581, |
|
"grad_norm": 0.8808399438858032, |
|
"learning_rate": 1.2605736509479827e-05, |
|
"epoch": 1.37, |
|
"step": 22600 |
|
}, |
|
{ |
|
"loss": 0.0603, |
|
"grad_norm": 0.8254494071006775, |
|
"learning_rate": 1.2484200291686924e-05, |
|
"epoch": 1.38, |
|
"step": 22700 |
|
}, |
|
{ |
|
"loss": 0.0589, |
|
"grad_norm": 0.9145941138267517, |
|
"learning_rate": 1.2362664073894022e-05, |
|
"epoch": 1.38, |
|
"step": 22800 |
|
}, |
|
{ |
|
"loss": 0.0594, |
|
"grad_norm": 1.267179012298584, |
|
"learning_rate": 1.2241127856101119e-05, |
|
"epoch": 1.39, |
|
"step": 22900 |
|
}, |
|
{ |
|
"loss": 0.0585, |
|
"grad_norm": 0.9012957215309143, |
|
"learning_rate": 1.2119591638308217e-05, |
|
"epoch": 1.4, |
|
"step": 23000 |
|
}, |
|
{ |
|
"loss": 0.0581, |
|
"grad_norm": 1.053276777267456, |
|
"learning_rate": 1.1998055420515315e-05, |
|
"epoch": 1.4, |
|
"step": 23100 |
|
}, |
|
{ |
|
"loss": 0.0579, |
|
"grad_norm": 1.031724214553833, |
|
"learning_rate": 1.1876519202722412e-05, |
|
"epoch": 1.41, |
|
"step": 23200 |
|
}, |
|
{ |
|
"loss": 0.0574, |
|
"grad_norm": 0.8730105757713318, |
|
"learning_rate": 1.175498298492951e-05, |
|
"epoch": 1.41, |
|
"step": 23300 |
|
}, |
|
{ |
|
"loss": 0.0589, |
|
"grad_norm": 0.871724545955658, |
|
"learning_rate": 1.1633446767136607e-05, |
|
"epoch": 1.42, |
|
"step": 23400 |
|
}, |
|
{ |
|
"loss": 0.0585, |
|
"grad_norm": 0.9031744599342346, |
|
"learning_rate": 1.1511910549343706e-05, |
|
"epoch": 1.43, |
|
"step": 23500 |
|
}, |
|
{ |
|
"loss": 0.0586, |
|
"grad_norm": 0.5891318917274475, |
|
"learning_rate": 1.1390374331550802e-05, |
|
"epoch": 1.43, |
|
"step": 23600 |
|
}, |
|
{ |
|
"loss": 0.0584, |
|
"grad_norm": 0.7399836182594299, |
|
"learning_rate": 1.12688381137579e-05, |
|
"epoch": 1.44, |
|
"step": 23700 |
|
}, |
|
{ |
|
"loss": 0.0596, |
|
"grad_norm": 0.47165361046791077, |
|
"learning_rate": 1.1147301895964999e-05, |
|
"epoch": 1.44, |
|
"step": 23800 |
|
}, |
|
{ |
|
"loss": 0.0588, |
|
"grad_norm": 0.8805158734321594, |
|
"learning_rate": 1.1025765678172096e-05, |
|
"epoch": 1.45, |
|
"step": 23900 |
|
}, |
|
{ |
|
"loss": 0.0587, |
|
"grad_norm": 0.6524300575256348, |
|
"learning_rate": 1.0904229460379194e-05, |
|
"epoch": 1.46, |
|
"step": 24000 |
|
}, |
|
{ |
|
"loss": 0.0599, |
|
"grad_norm": 0.7314462661743164, |
|
"learning_rate": 1.078269324258629e-05, |
|
"epoch": 1.46, |
|
"step": 24100 |
|
}, |
|
{ |
|
"loss": 0.0587, |
|
"grad_norm": 0.7969116568565369, |
|
"learning_rate": 1.0661157024793389e-05, |
|
"epoch": 1.47, |
|
"step": 24200 |
|
}, |
|
{ |
|
"loss": 0.0574, |
|
"grad_norm": 0.6548510193824768, |
|
"learning_rate": 1.0539620807000488e-05, |
|
"epoch": 1.47, |
|
"step": 24300 |
|
}, |
|
{ |
|
"loss": 0.0601, |
|
"grad_norm": 0.6944112181663513, |
|
"learning_rate": 1.0418084589207584e-05, |
|
"epoch": 1.48, |
|
"step": 24400 |
|
}, |
|
{ |
|
"loss": 0.0595, |
|
"grad_norm": 1.0091618299484253, |
|
"learning_rate": 1.0296548371414683e-05, |
|
"epoch": 1.49, |
|
"step": 24500 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.7692497372627258, |
|
"learning_rate": 1.017501215362178e-05, |
|
"epoch": 1.49, |
|
"step": 24600 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 1.2263282537460327, |
|
"learning_rate": 1.0053475935828878e-05, |
|
"epoch": 1.5, |
|
"step": 24700 |
|
}, |
|
{ |
|
"loss": 0.058, |
|
"grad_norm": 1.412335753440857, |
|
"learning_rate": 9.931939718035976e-06, |
|
"epoch": 1.5, |
|
"step": 24800 |
|
}, |
|
{ |
|
"loss": 0.0584, |
|
"grad_norm": 0.9114163517951965, |
|
"learning_rate": 9.810403500243073e-06, |
|
"epoch": 1.51, |
|
"step": 24900 |
|
}, |
|
{ |
|
"loss": 0.0579, |
|
"grad_norm": 0.8343012928962708, |
|
"learning_rate": 9.688867282450171e-06, |
|
"epoch": 1.52, |
|
"step": 25000 |
|
}, |
|
{ |
|
"loss": 0.0581, |
|
"grad_norm": 0.7137165665626526, |
|
"learning_rate": 9.567331064657268e-06, |
|
"epoch": 1.52, |
|
"step": 25100 |
|
}, |
|
{ |
|
"loss": 0.0572, |
|
"grad_norm": 0.8871126174926758, |
|
"learning_rate": 9.445794846864366e-06, |
|
"epoch": 1.53, |
|
"step": 25200 |
|
}, |
|
{ |
|
"loss": 0.0588, |
|
"grad_norm": 1.9913699626922607, |
|
"learning_rate": 9.324258629071465e-06, |
|
"epoch": 1.54, |
|
"step": 25300 |
|
}, |
|
{ |
|
"loss": 0.0586, |
|
"grad_norm": 0.702129065990448, |
|
"learning_rate": 9.202722411278561e-06, |
|
"epoch": 1.54, |
|
"step": 25400 |
|
}, |
|
{ |
|
"loss": 0.0589, |
|
"grad_norm": 0.759503960609436, |
|
"learning_rate": 9.08118619348566e-06, |
|
"epoch": 1.55, |
|
"step": 25500 |
|
}, |
|
{ |
|
"loss": 0.0598, |
|
"grad_norm": 0.7731884717941284, |
|
"learning_rate": 8.959649975692756e-06, |
|
"epoch": 1.55, |
|
"step": 25600 |
|
}, |
|
{ |
|
"loss": 0.0574, |
|
"grad_norm": 0.830560028553009, |
|
"learning_rate": 8.838113757899855e-06, |
|
"epoch": 1.56, |
|
"step": 25700 |
|
}, |
|
{ |
|
"loss": 0.0561, |
|
"grad_norm": 0.612714946269989, |
|
"learning_rate": 8.716577540106953e-06, |
|
"epoch": 1.57, |
|
"step": 25800 |
|
}, |
|
{ |
|
"loss": 0.0583, |
|
"grad_norm": 0.6476453542709351, |
|
"learning_rate": 8.59504132231405e-06, |
|
"epoch": 1.57, |
|
"step": 25900 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.6660561561584473, |
|
"learning_rate": 8.473505104521148e-06, |
|
"epoch": 1.58, |
|
"step": 26000 |
|
}, |
|
{ |
|
"loss": 0.0575, |
|
"grad_norm": 0.6638226509094238, |
|
"learning_rate": 8.351968886728245e-06, |
|
"epoch": 1.58, |
|
"step": 26100 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.6452857255935669, |
|
"learning_rate": 8.231648031113272e-06, |
|
"epoch": 1.59, |
|
"step": 26200 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.819333016872406, |
|
"learning_rate": 8.11011181332037e-06, |
|
"epoch": 1.6, |
|
"step": 26300 |
|
}, |
|
{ |
|
"loss": 0.0571, |
|
"grad_norm": 1.2114768028259277, |
|
"learning_rate": 7.988575595527467e-06, |
|
"epoch": 1.6, |
|
"step": 26400 |
|
}, |
|
{ |
|
"loss": 0.0577, |
|
"grad_norm": 0.7581117153167725, |
|
"learning_rate": 7.867039377734566e-06, |
|
"epoch": 1.61, |
|
"step": 26500 |
|
}, |
|
{ |
|
"loss": 0.0575, |
|
"grad_norm": 0.5861278772354126, |
|
"learning_rate": 7.745503159941663e-06, |
|
"epoch": 1.61, |
|
"step": 26600 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.7154746055603027, |
|
"learning_rate": 7.623966942148761e-06, |
|
"epoch": 1.62, |
|
"step": 26700 |
|
}, |
|
{ |
|
"loss": 0.0574, |
|
"grad_norm": 1.072407841682434, |
|
"learning_rate": 7.502430724355859e-06, |
|
"epoch": 1.63, |
|
"step": 26800 |
|
}, |
|
{ |
|
"loss": 0.0572, |
|
"grad_norm": 0.8198044896125793, |
|
"learning_rate": 7.380894506562957e-06, |
|
"epoch": 1.63, |
|
"step": 26900 |
|
}, |
|
{ |
|
"loss": 0.0562, |
|
"grad_norm": 0.7912253141403198, |
|
"learning_rate": 7.259358288770054e-06, |
|
"epoch": 1.64, |
|
"step": 27000 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.9015645980834961, |
|
"learning_rate": 7.137822070977152e-06, |
|
"epoch": 1.64, |
|
"step": 27100 |
|
}, |
|
{ |
|
"loss": 0.0551, |
|
"grad_norm": 0.6205886602401733, |
|
"learning_rate": 7.0162858531842495e-06, |
|
"epoch": 1.65, |
|
"step": 27200 |
|
}, |
|
{ |
|
"loss": 0.0581, |
|
"grad_norm": 0.8834924697875977, |
|
"learning_rate": 6.894749635391347e-06, |
|
"epoch": 1.66, |
|
"step": 27300 |
|
}, |
|
{ |
|
"loss": 0.0565, |
|
"grad_norm": 0.7698688507080078, |
|
"learning_rate": 6.773213417598445e-06, |
|
"epoch": 1.66, |
|
"step": 27400 |
|
}, |
|
{ |
|
"loss": 0.0575, |
|
"grad_norm": 0.8447450399398804, |
|
"learning_rate": 6.651677199805543e-06, |
|
"epoch": 1.67, |
|
"step": 27500 |
|
}, |
|
{ |
|
"loss": 0.057, |
|
"grad_norm": 1.6002224683761597, |
|
"learning_rate": 6.5301409820126404e-06, |
|
"epoch": 1.67, |
|
"step": 27600 |
|
}, |
|
{ |
|
"loss": 0.0558, |
|
"grad_norm": 0.8625892996788025, |
|
"learning_rate": 6.408604764219738e-06, |
|
"epoch": 1.68, |
|
"step": 27700 |
|
}, |
|
{ |
|
"loss": 0.0566, |
|
"grad_norm": 0.7483322024345398, |
|
"learning_rate": 6.2870685464268355e-06, |
|
"epoch": 1.69, |
|
"step": 27800 |
|
}, |
|
{ |
|
"loss": 0.0571, |
|
"grad_norm": 0.781535804271698, |
|
"learning_rate": 6.165532328633933e-06, |
|
"epoch": 1.69, |
|
"step": 27900 |
|
}, |
|
{ |
|
"loss": 0.0563, |
|
"grad_norm": 0.8761783838272095, |
|
"learning_rate": 6.0439961108410314e-06, |
|
"epoch": 1.7, |
|
"step": 28000 |
|
}, |
|
{ |
|
"loss": 0.0565, |
|
"grad_norm": 0.5183244943618774, |
|
"learning_rate": 5.922459893048129e-06, |
|
"epoch": 1.7, |
|
"step": 28100 |
|
}, |
|
{ |
|
"loss": 0.0564, |
|
"grad_norm": 0.7939796447753906, |
|
"learning_rate": 5.8009236752552265e-06, |
|
"epoch": 1.71, |
|
"step": 28200 |
|
}, |
|
{ |
|
"loss": 0.0576, |
|
"grad_norm": 0.7260966300964355, |
|
"learning_rate": 5.679387457462324e-06, |
|
"epoch": 1.72, |
|
"step": 28300 |
|
}, |
|
{ |
|
"loss": 0.0569, |
|
"grad_norm": 0.9087544083595276, |
|
"learning_rate": 5.557851239669422e-06, |
|
"epoch": 1.72, |
|
"step": 28400 |
|
}, |
|
{ |
|
"loss": 0.056, |
|
"grad_norm": 0.7275218367576599, |
|
"learning_rate": 5.436315021876519e-06, |
|
"epoch": 1.73, |
|
"step": 28500 |
|
}, |
|
{ |
|
"loss": 0.0563, |
|
"grad_norm": 0.5983753800392151, |
|
"learning_rate": 5.315994166261547e-06, |
|
"epoch": 1.74, |
|
"step": 28600 |
|
}, |
|
{ |
|
"loss": 0.0564, |
|
"grad_norm": 0.912756085395813, |
|
"learning_rate": 5.194457948468644e-06, |
|
"epoch": 1.74, |
|
"step": 28700 |
|
}, |
|
{ |
|
"loss": 0.0555, |
|
"grad_norm": 0.6085710525512695, |
|
"learning_rate": 5.072921730675742e-06, |
|
"epoch": 1.75, |
|
"step": 28800 |
|
}, |
|
{ |
|
"loss": 0.0571, |
|
"grad_norm": 0.6775307655334473, |
|
"learning_rate": 4.95138551288284e-06, |
|
"epoch": 1.75, |
|
"step": 28900 |
|
}, |
|
{ |
|
"loss": 0.0543, |
|
"grad_norm": 0.7438898682594299, |
|
"learning_rate": 4.829849295089938e-06, |
|
"epoch": 1.76, |
|
"step": 29000 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.719668984413147, |
|
"learning_rate": 4.708313077297035e-06, |
|
"epoch": 1.77, |
|
"step": 29100 |
|
}, |
|
{ |
|
"loss": 0.0565, |
|
"grad_norm": 0.8647979497909546, |
|
"learning_rate": 4.586776859504133e-06, |
|
"epoch": 1.77, |
|
"step": 29200 |
|
}, |
|
{ |
|
"loss": 0.057, |
|
"grad_norm": 0.8238335847854614, |
|
"learning_rate": 4.46524064171123e-06, |
|
"epoch": 1.78, |
|
"step": 29300 |
|
}, |
|
{ |
|
"loss": 0.0563, |
|
"grad_norm": 3.2504589557647705, |
|
"learning_rate": 4.343704423918328e-06, |
|
"epoch": 1.78, |
|
"step": 29400 |
|
}, |
|
{ |
|
"loss": 0.0536, |
|
"grad_norm": 0.7106683850288391, |
|
"learning_rate": 4.222168206125426e-06, |
|
"epoch": 1.79, |
|
"step": 29500 |
|
}, |
|
{ |
|
"loss": 0.056, |
|
"grad_norm": 0.9477577209472656, |
|
"learning_rate": 4.100631988332524e-06, |
|
"epoch": 1.8, |
|
"step": 29600 |
|
}, |
|
{ |
|
"loss": 0.0562, |
|
"grad_norm": 0.8888897895812988, |
|
"learning_rate": 3.979095770539621e-06, |
|
"epoch": 1.8, |
|
"step": 29700 |
|
}, |
|
{ |
|
"loss": 0.0562, |
|
"grad_norm": 0.7125309705734253, |
|
"learning_rate": 3.857559552746719e-06, |
|
"epoch": 1.81, |
|
"step": 29800 |
|
}, |
|
{ |
|
"loss": 0.0552, |
|
"grad_norm": 0.7241693139076233, |
|
"learning_rate": 3.7360233349538167e-06, |
|
"epoch": 1.81, |
|
"step": 29900 |
|
}, |
|
{ |
|
"loss": 0.0556, |
|
"grad_norm": 0.9381842613220215, |
|
"learning_rate": 3.6144871171609143e-06, |
|
"epoch": 1.82, |
|
"step": 30000 |
|
}, |
|
{ |
|
"loss": 0.0551, |
|
"grad_norm": 0.6808192133903503, |
|
"learning_rate": 3.492950899368012e-06, |
|
"epoch": 1.83, |
|
"step": 30100 |
|
}, |
|
{ |
|
"loss": 0.0561, |
|
"grad_norm": 0.6042631268501282, |
|
"learning_rate": 3.3714146815751098e-06, |
|
"epoch": 1.83, |
|
"step": 30200 |
|
}, |
|
{ |
|
"loss": 0.0553, |
|
"grad_norm": 0.5585273504257202, |
|
"learning_rate": 3.2498784637822073e-06, |
|
"epoch": 1.84, |
|
"step": 30300 |
|
}, |
|
{ |
|
"loss": 0.0545, |
|
"grad_norm": 0.9048868417739868, |
|
"learning_rate": 3.128342245989305e-06, |
|
"epoch": 1.84, |
|
"step": 30400 |
|
}, |
|
{ |
|
"loss": 0.0557, |
|
"grad_norm": 0.8429957628250122, |
|
"learning_rate": 3.006806028196403e-06, |
|
"epoch": 1.85, |
|
"step": 30500 |
|
}, |
|
{ |
|
"loss": 0.0563, |
|
"grad_norm": 0.7962875962257385, |
|
"learning_rate": 2.8852698104035003e-06, |
|
"epoch": 1.86, |
|
"step": 30600 |
|
}, |
|
{ |
|
"loss": 0.0559, |
|
"grad_norm": 0.7854676246643066, |
|
"learning_rate": 2.763733592610598e-06, |
|
"epoch": 1.86, |
|
"step": 30700 |
|
}, |
|
{ |
|
"loss": 0.0561, |
|
"grad_norm": 1.694869041442871, |
|
"learning_rate": 2.642197374817696e-06, |
|
"epoch": 1.87, |
|
"step": 30800 |
|
}, |
|
{ |
|
"loss": 0.0568, |
|
"grad_norm": 0.6683087944984436, |
|
"learning_rate": 2.5206611570247934e-06, |
|
"epoch": 1.87, |
|
"step": 30900 |
|
}, |
|
{ |
|
"loss": 0.0548, |
|
"grad_norm": 0.5675504803657532, |
|
"learning_rate": 2.3991249392318913e-06, |
|
"epoch": 1.88, |
|
"step": 31000 |
|
}, |
|
{ |
|
"loss": 0.0552, |
|
"grad_norm": 0.9730797410011292, |
|
"learning_rate": 2.2775887214389893e-06, |
|
"epoch": 1.89, |
|
"step": 31100 |
|
}, |
|
{ |
|
"loss": 0.0568, |
|
"grad_norm": 0.8015105128288269, |
|
"learning_rate": 2.156052503646087e-06, |
|
"epoch": 1.89, |
|
"step": 31200 |
|
}, |
|
{ |
|
"loss": 0.0552, |
|
"grad_norm": 0.5437925457954407, |
|
"learning_rate": 2.0345162858531844e-06, |
|
"epoch": 1.9, |
|
"step": 31300 |
|
}, |
|
{ |
|
"loss": 0.0558, |
|
"grad_norm": 0.8105918765068054, |
|
"learning_rate": 1.9129800680602823e-06, |
|
"epoch": 1.91, |
|
"step": 31400 |
|
}, |
|
{ |
|
"loss": 0.0567, |
|
"grad_norm": 0.8699814677238464, |
|
"learning_rate": 1.7914438502673799e-06, |
|
"epoch": 1.91, |
|
"step": 31500 |
|
}, |
|
{ |
|
"loss": 0.0556, |
|
"grad_norm": 0.542261004447937, |
|
"learning_rate": 1.6699076324744776e-06, |
|
"epoch": 1.92, |
|
"step": 31600 |
|
}, |
|
{ |
|
"loss": 0.0553, |
|
"grad_norm": 0.6852170825004578, |
|
"learning_rate": 1.5483714146815754e-06, |
|
"epoch": 1.92, |
|
"step": 31700 |
|
}, |
|
{ |
|
"loss": 0.0559, |
|
"grad_norm": 0.8324136137962341, |
|
"learning_rate": 1.426835196888673e-06, |
|
"epoch": 1.93, |
|
"step": 31800 |
|
}, |
|
{ |
|
"loss": 0.0539, |
|
"grad_norm": 0.5395376086235046, |
|
"learning_rate": 1.3052989790957707e-06, |
|
"epoch": 1.94, |
|
"step": 31900 |
|
}, |
|
{ |
|
"loss": 0.0557, |
|
"grad_norm": 1.0665556192398071, |
|
"learning_rate": 1.1837627613028684e-06, |
|
"epoch": 1.94, |
|
"step": 32000 |
|
}, |
|
{ |
|
"loss": 0.0556, |
|
"grad_norm": 0.5730076432228088, |
|
"learning_rate": 1.062226543509966e-06, |
|
"epoch": 1.95, |
|
"step": 32100 |
|
}, |
|
{ |
|
"loss": 0.0566, |
|
"grad_norm": 0.8526155352592468, |
|
"learning_rate": 9.406903257170638e-07, |
|
"epoch": 1.95, |
|
"step": 32200 |
|
}, |
|
{ |
|
"loss": 0.0554, |
|
"grad_norm": 0.47227638959884644, |
|
"learning_rate": 8.191541079241614e-07, |
|
"epoch": 1.96, |
|
"step": 32300 |
|
}, |
|
{ |
|
"loss": 0.0559, |
|
"grad_norm": 0.5771980881690979, |
|
"learning_rate": 6.976178901312592e-07, |
|
"epoch": 1.97, |
|
"step": 32400 |
|
}, |
|
{ |
|
"loss": 0.0553, |
|
"grad_norm": 0.7183811068534851, |
|
"learning_rate": 5.772970345162859e-07, |
|
"epoch": 1.97, |
|
"step": 32500 |
|
}, |
|
{ |
|
"loss": 0.0556, |
|
"grad_norm": 0.7808952927589417, |
|
"learning_rate": 4.557608167233836e-07, |
|
"epoch": 1.98, |
|
"step": 32600 |
|
}, |
|
{ |
|
"loss": 0.0549, |
|
"grad_norm": 0.7201197743415833, |
|
"learning_rate": 3.3422459893048135e-07, |
|
"epoch": 1.98, |
|
"step": 32700 |
|
}, |
|
{ |
|
"loss": 0.0546, |
|
"grad_norm": 0.822515606880188, |
|
"learning_rate": 2.1268838113757902e-07, |
|
"epoch": 1.99, |
|
"step": 32800 |
|
}, |
|
{ |
|
"loss": 0.0556, |
|
"grad_norm": 0.6968460083007812, |
|
"learning_rate": 9.115216334467672e-08, |
|
"epoch": 2.0, |
|
"step": 32900 |
|
}, |
|
{ |
|
"eval_loss": 0.06514331698417664, |
|
"eval_f1": 0.9055283859012663, |
|
"eval_precision": 0.9128121708644065, |
|
"eval_recall": 0.898553824781504, |
|
"eval_accuracy": 0.9750088848296079, |
|
"eval_runtime": 304.326, |
|
"eval_samples_per_second": 86.841, |
|
"eval_steps_per_second": 10.857, |
|
"epoch": 2.0, |
|
"step": 32962 |
|
}, |
|
{ |
|
"train_runtime": 12949.9436, |
|
"train_samples_per_second": 20.363, |
|
"train_steps_per_second": 2.545, |
|
"total_flos": 2.448996403000443e+17, |
|
"train_loss": 0.07225221031304233, |
|
"epoch": 2.0, |
|
"step": 32962 |
|
} |
|
] |
|
} |
|
} |
|
} |