xlm-roberta-base-finetuned-arabic / trainer_state.json
Davlan's picture
Upload trainer_state.json
ea50a15
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 141057,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.982276668297213e-05,
"loss": 2.4577,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.964553336594426e-05,
"loss": 2.4076,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.94683000489164e-05,
"loss": 2.3823,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.929106673188853e-05,
"loss": 2.3764,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 4.911383341486066e-05,
"loss": 2.3482,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 4.893660009783279e-05,
"loss": 2.3238,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 4.8759366780804925e-05,
"loss": 2.3344,
"step": 3500
},
{
"epoch": 0.09,
"learning_rate": 4.8582133463777055e-05,
"loss": 2.3137,
"step": 4000
},
{
"epoch": 0.1,
"learning_rate": 4.840490014674919e-05,
"loss": 2.3036,
"step": 4500
},
{
"epoch": 0.11,
"learning_rate": 4.822766682972132e-05,
"loss": 2.2799,
"step": 5000
},
{
"epoch": 0.12,
"learning_rate": 4.805043351269346e-05,
"loss": 2.283,
"step": 5500
},
{
"epoch": 0.13,
"learning_rate": 4.787320019566559e-05,
"loss": 2.271,
"step": 6000
},
{
"epoch": 0.14,
"learning_rate": 4.769596687863772e-05,
"loss": 2.267,
"step": 6500
},
{
"epoch": 0.15,
"learning_rate": 4.751873356160985e-05,
"loss": 2.2637,
"step": 7000
},
{
"epoch": 0.16,
"learning_rate": 4.7341500244581984e-05,
"loss": 2.2565,
"step": 7500
},
{
"epoch": 0.17,
"learning_rate": 4.7164266927554114e-05,
"loss": 2.2501,
"step": 8000
},
{
"epoch": 0.18,
"learning_rate": 4.6987033610526244e-05,
"loss": 2.2233,
"step": 8500
},
{
"epoch": 0.19,
"learning_rate": 4.6809800293498373e-05,
"loss": 2.2321,
"step": 9000
},
{
"epoch": 0.2,
"learning_rate": 4.663256697647051e-05,
"loss": 2.221,
"step": 9500
},
{
"epoch": 0.21,
"learning_rate": 4.645533365944264e-05,
"loss": 2.2343,
"step": 10000
},
{
"epoch": 0.22,
"learning_rate": 4.627810034241477e-05,
"loss": 2.2038,
"step": 10500
},
{
"epoch": 0.23,
"learning_rate": 4.61008670253869e-05,
"loss": 2.2013,
"step": 11000
},
{
"epoch": 0.24,
"learning_rate": 4.592363370835903e-05,
"loss": 2.2105,
"step": 11500
},
{
"epoch": 0.26,
"learning_rate": 4.5746400391331166e-05,
"loss": 2.1881,
"step": 12000
},
{
"epoch": 0.27,
"learning_rate": 4.5569167074303296e-05,
"loss": 2.1832,
"step": 12500
},
{
"epoch": 0.28,
"learning_rate": 4.5391933757275426e-05,
"loss": 2.1774,
"step": 13000
},
{
"epoch": 0.29,
"learning_rate": 4.521470044024756e-05,
"loss": 2.1714,
"step": 13500
},
{
"epoch": 0.3,
"learning_rate": 4.503746712321969e-05,
"loss": 2.1602,
"step": 14000
},
{
"epoch": 0.31,
"learning_rate": 4.486023380619183e-05,
"loss": 2.1583,
"step": 14500
},
{
"epoch": 0.32,
"learning_rate": 4.468300048916396e-05,
"loss": 2.1707,
"step": 15000
},
{
"epoch": 0.33,
"learning_rate": 4.450576717213609e-05,
"loss": 2.1689,
"step": 15500
},
{
"epoch": 0.34,
"learning_rate": 4.4328533855108225e-05,
"loss": 2.1479,
"step": 16000
},
{
"epoch": 0.35,
"learning_rate": 4.4151300538080355e-05,
"loss": 2.1554,
"step": 16500
},
{
"epoch": 0.36,
"learning_rate": 4.3974067221052485e-05,
"loss": 2.1309,
"step": 17000
},
{
"epoch": 0.37,
"learning_rate": 4.3796833904024615e-05,
"loss": 2.1328,
"step": 17500
},
{
"epoch": 0.38,
"learning_rate": 4.361960058699675e-05,
"loss": 2.1469,
"step": 18000
},
{
"epoch": 0.39,
"learning_rate": 4.344236726996888e-05,
"loss": 2.1316,
"step": 18500
},
{
"epoch": 0.4,
"learning_rate": 4.326513395294101e-05,
"loss": 2.1387,
"step": 19000
},
{
"epoch": 0.41,
"learning_rate": 4.308790063591314e-05,
"loss": 2.1143,
"step": 19500
},
{
"epoch": 0.43,
"learning_rate": 4.291066731888528e-05,
"loss": 2.1301,
"step": 20000
},
{
"epoch": 0.44,
"learning_rate": 4.273343400185741e-05,
"loss": 2.1275,
"step": 20500
},
{
"epoch": 0.45,
"learning_rate": 4.255620068482954e-05,
"loss": 2.1172,
"step": 21000
},
{
"epoch": 0.46,
"learning_rate": 4.237896736780167e-05,
"loss": 2.118,
"step": 21500
},
{
"epoch": 0.47,
"learning_rate": 4.2201734050773804e-05,
"loss": 2.1287,
"step": 22000
},
{
"epoch": 0.48,
"learning_rate": 4.202450073374593e-05,
"loss": 2.1148,
"step": 22500
},
{
"epoch": 0.49,
"learning_rate": 4.184726741671807e-05,
"loss": 2.1019,
"step": 23000
},
{
"epoch": 0.5,
"learning_rate": 4.16700340996902e-05,
"loss": 2.1088,
"step": 23500
},
{
"epoch": 0.51,
"learning_rate": 4.149280078266233e-05,
"loss": 2.1092,
"step": 24000
},
{
"epoch": 0.52,
"learning_rate": 4.1315567465634466e-05,
"loss": 2.0987,
"step": 24500
},
{
"epoch": 0.53,
"learning_rate": 4.1138334148606596e-05,
"loss": 2.0778,
"step": 25000
},
{
"epoch": 0.54,
"learning_rate": 4.0961100831578726e-05,
"loss": 2.0963,
"step": 25500
},
{
"epoch": 0.55,
"learning_rate": 4.0783867514550856e-05,
"loss": 2.0868,
"step": 26000
},
{
"epoch": 0.56,
"learning_rate": 4.060663419752299e-05,
"loss": 2.0881,
"step": 26500
},
{
"epoch": 0.57,
"learning_rate": 4.042940088049512e-05,
"loss": 2.0887,
"step": 27000
},
{
"epoch": 0.58,
"learning_rate": 4.025216756346725e-05,
"loss": 2.0921,
"step": 27500
},
{
"epoch": 0.6,
"learning_rate": 4.007493424643938e-05,
"loss": 2.0782,
"step": 28000
},
{
"epoch": 0.61,
"learning_rate": 3.989770092941152e-05,
"loss": 2.0626,
"step": 28500
},
{
"epoch": 0.62,
"learning_rate": 3.972046761238365e-05,
"loss": 2.077,
"step": 29000
},
{
"epoch": 0.63,
"learning_rate": 3.954323429535578e-05,
"loss": 2.0728,
"step": 29500
},
{
"epoch": 0.64,
"learning_rate": 3.936600097832791e-05,
"loss": 2.0528,
"step": 30000
},
{
"epoch": 0.65,
"learning_rate": 3.918876766130004e-05,
"loss": 2.0661,
"step": 30500
},
{
"epoch": 0.66,
"learning_rate": 3.9011534344272175e-05,
"loss": 2.0639,
"step": 31000
},
{
"epoch": 0.67,
"learning_rate": 3.8834301027244304e-05,
"loss": 2.063,
"step": 31500
},
{
"epoch": 0.68,
"learning_rate": 3.865706771021644e-05,
"loss": 2.0445,
"step": 32000
},
{
"epoch": 0.69,
"learning_rate": 3.847983439318857e-05,
"loss": 2.0514,
"step": 32500
},
{
"epoch": 0.7,
"learning_rate": 3.830260107616071e-05,
"loss": 2.0538,
"step": 33000
},
{
"epoch": 0.71,
"learning_rate": 3.812536775913284e-05,
"loss": 2.041,
"step": 33500
},
{
"epoch": 0.72,
"learning_rate": 3.794813444210497e-05,
"loss": 2.0441,
"step": 34000
},
{
"epoch": 0.73,
"learning_rate": 3.77709011250771e-05,
"loss": 2.044,
"step": 34500
},
{
"epoch": 0.74,
"learning_rate": 3.7593667808049234e-05,
"loss": 2.0449,
"step": 35000
},
{
"epoch": 0.76,
"learning_rate": 3.7416434491021363e-05,
"loss": 2.0381,
"step": 35500
},
{
"epoch": 0.77,
"learning_rate": 3.723920117399349e-05,
"loss": 2.0302,
"step": 36000
},
{
"epoch": 0.78,
"learning_rate": 3.706196785696562e-05,
"loss": 2.028,
"step": 36500
},
{
"epoch": 0.79,
"learning_rate": 3.688473453993776e-05,
"loss": 2.0335,
"step": 37000
},
{
"epoch": 0.8,
"learning_rate": 3.670750122290989e-05,
"loss": 2.0289,
"step": 37500
},
{
"epoch": 0.81,
"learning_rate": 3.653026790588202e-05,
"loss": 2.0072,
"step": 38000
},
{
"epoch": 0.82,
"learning_rate": 3.635303458885415e-05,
"loss": 2.0341,
"step": 38500
},
{
"epoch": 0.83,
"learning_rate": 3.6175801271826286e-05,
"loss": 2.0233,
"step": 39000
},
{
"epoch": 0.84,
"learning_rate": 3.5998567954798416e-05,
"loss": 2.0146,
"step": 39500
},
{
"epoch": 0.85,
"learning_rate": 3.5821334637770546e-05,
"loss": 2.0205,
"step": 40000
},
{
"epoch": 0.86,
"learning_rate": 3.5644101320742675e-05,
"loss": 2.009,
"step": 40500
},
{
"epoch": 0.87,
"learning_rate": 3.546686800371481e-05,
"loss": 2.0023,
"step": 41000
},
{
"epoch": 0.88,
"learning_rate": 3.528963468668695e-05,
"loss": 2.0102,
"step": 41500
},
{
"epoch": 0.89,
"learning_rate": 3.511240136965908e-05,
"loss": 2.0074,
"step": 42000
},
{
"epoch": 0.9,
"learning_rate": 3.493516805263121e-05,
"loss": 1.9968,
"step": 42500
},
{
"epoch": 0.91,
"learning_rate": 3.475793473560334e-05,
"loss": 1.9968,
"step": 43000
},
{
"epoch": 0.93,
"learning_rate": 3.4580701418575475e-05,
"loss": 2.0042,
"step": 43500
},
{
"epoch": 0.94,
"learning_rate": 3.4403468101547605e-05,
"loss": 1.9947,
"step": 44000
},
{
"epoch": 0.95,
"learning_rate": 3.4226234784519735e-05,
"loss": 1.9995,
"step": 44500
},
{
"epoch": 0.96,
"learning_rate": 3.4049001467491864e-05,
"loss": 1.9929,
"step": 45000
},
{
"epoch": 0.97,
"learning_rate": 3.3871768150464e-05,
"loss": 1.9935,
"step": 45500
},
{
"epoch": 0.98,
"learning_rate": 3.369453483343613e-05,
"loss": 1.9931,
"step": 46000
},
{
"epoch": 0.99,
"learning_rate": 3.351730151640826e-05,
"loss": 1.9944,
"step": 46500
},
{
"epoch": 1.0,
"learning_rate": 3.334006819938039e-05,
"loss": 1.9779,
"step": 47000
},
{
"epoch": 1.01,
"learning_rate": 3.316283488235253e-05,
"loss": 1.9819,
"step": 47500
},
{
"epoch": 1.02,
"learning_rate": 3.298560156532466e-05,
"loss": 1.9798,
"step": 48000
},
{
"epoch": 1.03,
"learning_rate": 3.280836824829679e-05,
"loss": 1.9767,
"step": 48500
},
{
"epoch": 1.04,
"learning_rate": 3.263113493126892e-05,
"loss": 1.963,
"step": 49000
},
{
"epoch": 1.05,
"learning_rate": 3.245390161424105e-05,
"loss": 1.9686,
"step": 49500
},
{
"epoch": 1.06,
"learning_rate": 3.227666829721318e-05,
"loss": 1.9529,
"step": 50000
},
{
"epoch": 1.07,
"learning_rate": 3.209943498018532e-05,
"loss": 1.96,
"step": 50500
},
{
"epoch": 1.08,
"learning_rate": 3.192220166315745e-05,
"loss": 1.9683,
"step": 51000
},
{
"epoch": 1.1,
"learning_rate": 3.174496834612958e-05,
"loss": 1.9574,
"step": 51500
},
{
"epoch": 1.11,
"learning_rate": 3.1567735029101716e-05,
"loss": 1.9573,
"step": 52000
},
{
"epoch": 1.12,
"learning_rate": 3.1390501712073846e-05,
"loss": 1.9582,
"step": 52500
},
{
"epoch": 1.13,
"learning_rate": 3.1213268395045976e-05,
"loss": 1.9559,
"step": 53000
},
{
"epoch": 1.14,
"learning_rate": 3.1036035078018106e-05,
"loss": 1.9464,
"step": 53500
},
{
"epoch": 1.15,
"learning_rate": 3.085880176099024e-05,
"loss": 1.9512,
"step": 54000
},
{
"epoch": 1.16,
"learning_rate": 3.068156844396237e-05,
"loss": 1.9676,
"step": 54500
},
{
"epoch": 1.17,
"learning_rate": 3.0504335126934502e-05,
"loss": 1.946,
"step": 55000
},
{
"epoch": 1.18,
"learning_rate": 3.0327101809906632e-05,
"loss": 1.9586,
"step": 55500
},
{
"epoch": 1.19,
"learning_rate": 3.014986849287877e-05,
"loss": 1.9553,
"step": 56000
},
{
"epoch": 1.2,
"learning_rate": 2.9972635175850898e-05,
"loss": 1.9424,
"step": 56500
},
{
"epoch": 1.21,
"learning_rate": 2.979540185882303e-05,
"loss": 1.9431,
"step": 57000
},
{
"epoch": 1.22,
"learning_rate": 2.961816854179516e-05,
"loss": 1.9562,
"step": 57500
},
{
"epoch": 1.23,
"learning_rate": 2.9440935224767298e-05,
"loss": 1.9376,
"step": 58000
},
{
"epoch": 1.24,
"learning_rate": 2.9263701907739428e-05,
"loss": 1.9313,
"step": 58500
},
{
"epoch": 1.25,
"learning_rate": 2.9086468590711558e-05,
"loss": 1.9492,
"step": 59000
},
{
"epoch": 1.27,
"learning_rate": 2.8909235273683687e-05,
"loss": 1.9396,
"step": 59500
},
{
"epoch": 1.28,
"learning_rate": 2.8732001956655824e-05,
"loss": 1.9343,
"step": 60000
},
{
"epoch": 1.29,
"learning_rate": 2.8554768639627954e-05,
"loss": 1.9446,
"step": 60500
},
{
"epoch": 1.3,
"learning_rate": 2.8377535322600084e-05,
"loss": 1.9325,
"step": 61000
},
{
"epoch": 1.31,
"learning_rate": 2.8200302005572217e-05,
"loss": 1.9321,
"step": 61500
},
{
"epoch": 1.32,
"learning_rate": 2.8023068688544347e-05,
"loss": 1.9295,
"step": 62000
},
{
"epoch": 1.33,
"learning_rate": 2.7845835371516483e-05,
"loss": 1.928,
"step": 62500
},
{
"epoch": 1.34,
"learning_rate": 2.7668602054488613e-05,
"loss": 1.939,
"step": 63000
},
{
"epoch": 1.35,
"learning_rate": 2.7491368737460743e-05,
"loss": 1.9172,
"step": 63500
},
{
"epoch": 1.36,
"learning_rate": 2.7314135420432873e-05,
"loss": 1.9185,
"step": 64000
},
{
"epoch": 1.37,
"learning_rate": 2.713690210340501e-05,
"loss": 1.9299,
"step": 64500
},
{
"epoch": 1.38,
"learning_rate": 2.695966878637714e-05,
"loss": 1.9301,
"step": 65000
},
{
"epoch": 1.39,
"learning_rate": 2.678243546934927e-05,
"loss": 1.9189,
"step": 65500
},
{
"epoch": 1.4,
"learning_rate": 2.6605202152321402e-05,
"loss": 1.9303,
"step": 66000
},
{
"epoch": 1.41,
"learning_rate": 2.6427968835293536e-05,
"loss": 1.9151,
"step": 66500
},
{
"epoch": 1.42,
"learning_rate": 2.625073551826567e-05,
"loss": 1.9236,
"step": 67000
},
{
"epoch": 1.44,
"learning_rate": 2.60735022012378e-05,
"loss": 1.9198,
"step": 67500
},
{
"epoch": 1.45,
"learning_rate": 2.589626888420993e-05,
"loss": 1.91,
"step": 68000
},
{
"epoch": 1.46,
"learning_rate": 2.5719035567182065e-05,
"loss": 1.8999,
"step": 68500
},
{
"epoch": 1.47,
"learning_rate": 2.5541802250154195e-05,
"loss": 1.9141,
"step": 69000
},
{
"epoch": 1.48,
"learning_rate": 2.5364568933126325e-05,
"loss": 1.9012,
"step": 69500
},
{
"epoch": 1.49,
"learning_rate": 2.5187335616098455e-05,
"loss": 1.8999,
"step": 70000
},
{
"epoch": 1.5,
"learning_rate": 2.501010229907059e-05,
"loss": 1.8923,
"step": 70500
},
{
"epoch": 1.51,
"learning_rate": 2.483286898204272e-05,
"loss": 1.9068,
"step": 71000
},
{
"epoch": 1.52,
"learning_rate": 2.4655635665014854e-05,
"loss": 1.9013,
"step": 71500
},
{
"epoch": 1.53,
"learning_rate": 2.4478402347986984e-05,
"loss": 1.9018,
"step": 72000
},
{
"epoch": 1.54,
"learning_rate": 2.4301169030959117e-05,
"loss": 1.8892,
"step": 72500
},
{
"epoch": 1.55,
"learning_rate": 2.4123935713931247e-05,
"loss": 1.9016,
"step": 73000
},
{
"epoch": 1.56,
"learning_rate": 2.394670239690338e-05,
"loss": 1.9058,
"step": 73500
},
{
"epoch": 1.57,
"learning_rate": 2.376946907987551e-05,
"loss": 1.91,
"step": 74000
},
{
"epoch": 1.58,
"learning_rate": 2.3592235762847644e-05,
"loss": 1.889,
"step": 74500
},
{
"epoch": 1.6,
"learning_rate": 2.3415002445819777e-05,
"loss": 1.8988,
"step": 75000
},
{
"epoch": 1.61,
"learning_rate": 2.323776912879191e-05,
"loss": 1.8957,
"step": 75500
},
{
"epoch": 1.62,
"learning_rate": 2.306053581176404e-05,
"loss": 1.8938,
"step": 76000
},
{
"epoch": 1.63,
"learning_rate": 2.2883302494736173e-05,
"loss": 1.8979,
"step": 76500
},
{
"epoch": 1.64,
"learning_rate": 2.2706069177708303e-05,
"loss": 1.8898,
"step": 77000
},
{
"epoch": 1.65,
"learning_rate": 2.2528835860680436e-05,
"loss": 1.891,
"step": 77500
},
{
"epoch": 1.66,
"learning_rate": 2.2351602543652566e-05,
"loss": 1.8858,
"step": 78000
},
{
"epoch": 1.67,
"learning_rate": 2.21743692266247e-05,
"loss": 1.8749,
"step": 78500
},
{
"epoch": 1.68,
"learning_rate": 2.199713590959683e-05,
"loss": 1.8833,
"step": 79000
},
{
"epoch": 1.69,
"learning_rate": 2.1819902592568962e-05,
"loss": 1.8788,
"step": 79500
},
{
"epoch": 1.7,
"learning_rate": 2.1642669275541096e-05,
"loss": 1.8809,
"step": 80000
},
{
"epoch": 1.71,
"learning_rate": 2.146543595851323e-05,
"loss": 1.8796,
"step": 80500
},
{
"epoch": 1.72,
"learning_rate": 2.128820264148536e-05,
"loss": 1.8863,
"step": 81000
},
{
"epoch": 1.73,
"learning_rate": 2.111096932445749e-05,
"loss": 1.8814,
"step": 81500
},
{
"epoch": 1.74,
"learning_rate": 2.0933736007429622e-05,
"loss": 1.8814,
"step": 82000
},
{
"epoch": 1.75,
"learning_rate": 2.075650269040175e-05,
"loss": 1.8654,
"step": 82500
},
{
"epoch": 1.77,
"learning_rate": 2.0579269373373885e-05,
"loss": 1.8806,
"step": 83000
},
{
"epoch": 1.78,
"learning_rate": 2.0402036056346015e-05,
"loss": 1.8659,
"step": 83500
},
{
"epoch": 1.79,
"learning_rate": 2.0224802739318148e-05,
"loss": 1.8837,
"step": 84000
},
{
"epoch": 1.8,
"learning_rate": 2.004756942229028e-05,
"loss": 1.8608,
"step": 84500
},
{
"epoch": 1.81,
"learning_rate": 1.9870336105262414e-05,
"loss": 1.8754,
"step": 85000
},
{
"epoch": 1.82,
"learning_rate": 1.9693102788234544e-05,
"loss": 1.8703,
"step": 85500
},
{
"epoch": 1.83,
"learning_rate": 1.9515869471206677e-05,
"loss": 1.8642,
"step": 86000
},
{
"epoch": 1.84,
"learning_rate": 1.9338636154178807e-05,
"loss": 1.8701,
"step": 86500
},
{
"epoch": 1.85,
"learning_rate": 1.916140283715094e-05,
"loss": 1.863,
"step": 87000
},
{
"epoch": 1.86,
"learning_rate": 1.898416952012307e-05,
"loss": 1.8672,
"step": 87500
},
{
"epoch": 1.87,
"learning_rate": 1.8806936203095204e-05,
"loss": 1.8638,
"step": 88000
},
{
"epoch": 1.88,
"learning_rate": 1.8629702886067333e-05,
"loss": 1.8709,
"step": 88500
},
{
"epoch": 1.89,
"learning_rate": 1.845246956903947e-05,
"loss": 1.8496,
"step": 89000
},
{
"epoch": 1.9,
"learning_rate": 1.82752362520116e-05,
"loss": 1.8601,
"step": 89500
},
{
"epoch": 1.91,
"learning_rate": 1.8098002934983733e-05,
"loss": 1.8628,
"step": 90000
},
{
"epoch": 1.92,
"learning_rate": 1.7920769617955863e-05,
"loss": 1.8551,
"step": 90500
},
{
"epoch": 1.94,
"learning_rate": 1.7743536300927993e-05,
"loss": 1.8493,
"step": 91000
},
{
"epoch": 1.95,
"learning_rate": 1.7566302983900126e-05,
"loss": 1.866,
"step": 91500
},
{
"epoch": 1.96,
"learning_rate": 1.7389069666872256e-05,
"loss": 1.8527,
"step": 92000
},
{
"epoch": 1.97,
"learning_rate": 1.721183634984439e-05,
"loss": 1.8541,
"step": 92500
},
{
"epoch": 1.98,
"learning_rate": 1.7034603032816522e-05,
"loss": 1.8483,
"step": 93000
},
{
"epoch": 1.99,
"learning_rate": 1.6857369715788656e-05,
"loss": 1.8555,
"step": 93500
},
{
"epoch": 2.0,
"learning_rate": 1.6680136398760785e-05,
"loss": 1.8524,
"step": 94000
},
{
"epoch": 2.01,
"learning_rate": 1.650290308173292e-05,
"loss": 1.8502,
"step": 94500
},
{
"epoch": 2.02,
"learning_rate": 1.632566976470505e-05,
"loss": 1.8332,
"step": 95000
},
{
"epoch": 2.03,
"learning_rate": 1.614843644767718e-05,
"loss": 1.8336,
"step": 95500
},
{
"epoch": 2.04,
"learning_rate": 1.597120313064931e-05,
"loss": 1.8362,
"step": 96000
},
{
"epoch": 2.05,
"learning_rate": 1.5793969813621445e-05,
"loss": 1.845,
"step": 96500
},
{
"epoch": 2.06,
"learning_rate": 1.5616736496593575e-05,
"loss": 1.8313,
"step": 97000
},
{
"epoch": 2.07,
"learning_rate": 1.5439503179565708e-05,
"loss": 1.8447,
"step": 97500
},
{
"epoch": 2.08,
"learning_rate": 1.526226986253784e-05,
"loss": 1.8309,
"step": 98000
},
{
"epoch": 2.09,
"learning_rate": 1.5085036545509973e-05,
"loss": 1.8321,
"step": 98500
},
{
"epoch": 2.11,
"learning_rate": 1.4907803228482104e-05,
"loss": 1.8326,
"step": 99000
},
{
"epoch": 2.12,
"learning_rate": 1.4730569911454237e-05,
"loss": 1.8386,
"step": 99500
},
{
"epoch": 2.13,
"learning_rate": 1.4553336594426367e-05,
"loss": 1.8316,
"step": 100000
},
{
"epoch": 2.14,
"learning_rate": 1.43761032773985e-05,
"loss": 1.8337,
"step": 100500
},
{
"epoch": 2.15,
"learning_rate": 1.419886996037063e-05,
"loss": 1.8395,
"step": 101000
},
{
"epoch": 2.16,
"learning_rate": 1.4021636643342762e-05,
"loss": 1.8345,
"step": 101500
},
{
"epoch": 2.17,
"learning_rate": 1.3844403326314895e-05,
"loss": 1.8324,
"step": 102000
},
{
"epoch": 2.18,
"learning_rate": 1.3667170009287025e-05,
"loss": 1.8222,
"step": 102500
},
{
"epoch": 2.19,
"learning_rate": 1.3489936692259158e-05,
"loss": 1.8077,
"step": 103000
},
{
"epoch": 2.2,
"learning_rate": 1.331270337523129e-05,
"loss": 1.8202,
"step": 103500
},
{
"epoch": 2.21,
"learning_rate": 1.3135470058203423e-05,
"loss": 1.8052,
"step": 104000
},
{
"epoch": 2.22,
"learning_rate": 1.2958236741175553e-05,
"loss": 1.8069,
"step": 104500
},
{
"epoch": 2.23,
"learning_rate": 1.2781003424147686e-05,
"loss": 1.8158,
"step": 105000
},
{
"epoch": 2.24,
"learning_rate": 1.2603770107119817e-05,
"loss": 1.8183,
"step": 105500
},
{
"epoch": 2.25,
"learning_rate": 1.242653679009195e-05,
"loss": 1.8116,
"step": 106000
},
{
"epoch": 2.27,
"learning_rate": 1.224930347306408e-05,
"loss": 1.8095,
"step": 106500
},
{
"epoch": 2.28,
"learning_rate": 1.2072070156036212e-05,
"loss": 1.8207,
"step": 107000
},
{
"epoch": 2.29,
"learning_rate": 1.1894836839008344e-05,
"loss": 1.8184,
"step": 107500
},
{
"epoch": 2.3,
"learning_rate": 1.1717603521980477e-05,
"loss": 1.8278,
"step": 108000
},
{
"epoch": 2.31,
"learning_rate": 1.1540370204952608e-05,
"loss": 1.8164,
"step": 108500
},
{
"epoch": 2.32,
"learning_rate": 1.136313688792474e-05,
"loss": 1.8244,
"step": 109000
},
{
"epoch": 2.33,
"learning_rate": 1.1185903570896871e-05,
"loss": 1.8199,
"step": 109500
},
{
"epoch": 2.34,
"learning_rate": 1.1008670253869003e-05,
"loss": 1.8252,
"step": 110000
},
{
"epoch": 2.35,
"learning_rate": 1.0831436936841136e-05,
"loss": 1.808,
"step": 110500
},
{
"epoch": 2.36,
"learning_rate": 1.0654203619813268e-05,
"loss": 1.8097,
"step": 111000
},
{
"epoch": 2.37,
"learning_rate": 1.04769703027854e-05,
"loss": 1.8049,
"step": 111500
},
{
"epoch": 2.38,
"learning_rate": 1.029973698575753e-05,
"loss": 1.8081,
"step": 112000
},
{
"epoch": 2.39,
"learning_rate": 1.0122503668729662e-05,
"loss": 1.8134,
"step": 112500
},
{
"epoch": 2.4,
"learning_rate": 9.945270351701796e-06,
"loss": 1.7976,
"step": 113000
},
{
"epoch": 2.41,
"learning_rate": 9.768037034673927e-06,
"loss": 1.8036,
"step": 113500
},
{
"epoch": 2.42,
"learning_rate": 9.590803717646059e-06,
"loss": 1.7979,
"step": 114000
},
{
"epoch": 2.44,
"learning_rate": 9.41357040061819e-06,
"loss": 1.8165,
"step": 114500
},
{
"epoch": 2.45,
"learning_rate": 9.236337083590323e-06,
"loss": 1.8069,
"step": 115000
},
{
"epoch": 2.46,
"learning_rate": 9.059103766562455e-06,
"loss": 1.7922,
"step": 115500
},
{
"epoch": 2.47,
"learning_rate": 8.881870449534587e-06,
"loss": 1.7981,
"step": 116000
},
{
"epoch": 2.48,
"learning_rate": 8.704637132506716e-06,
"loss": 1.7998,
"step": 116500
},
{
"epoch": 2.49,
"learning_rate": 8.52740381547885e-06,
"loss": 1.7938,
"step": 117000
},
{
"epoch": 2.5,
"learning_rate": 8.350170498450981e-06,
"loss": 1.8041,
"step": 117500
},
{
"epoch": 2.51,
"learning_rate": 8.172937181423113e-06,
"loss": 1.8037,
"step": 118000
},
{
"epoch": 2.52,
"learning_rate": 7.995703864395244e-06,
"loss": 1.7945,
"step": 118500
},
{
"epoch": 2.53,
"learning_rate": 7.818470547367376e-06,
"loss": 1.8138,
"step": 119000
},
{
"epoch": 2.54,
"learning_rate": 7.641237230339509e-06,
"loss": 1.792,
"step": 119500
},
{
"epoch": 2.55,
"learning_rate": 7.4640039133116405e-06,
"loss": 1.7963,
"step": 120000
},
{
"epoch": 2.56,
"learning_rate": 7.286770596283772e-06,
"loss": 1.7906,
"step": 120500
},
{
"epoch": 2.57,
"learning_rate": 7.1095372792559036e-06,
"loss": 1.7933,
"step": 121000
},
{
"epoch": 2.58,
"learning_rate": 6.932303962228036e-06,
"loss": 1.7877,
"step": 121500
},
{
"epoch": 2.59,
"learning_rate": 6.7550706452001675e-06,
"loss": 1.7999,
"step": 122000
},
{
"epoch": 2.61,
"learning_rate": 6.5778373281723e-06,
"loss": 1.7839,
"step": 122500
},
{
"epoch": 2.62,
"learning_rate": 6.400604011144431e-06,
"loss": 1.7907,
"step": 123000
},
{
"epoch": 2.63,
"learning_rate": 6.223370694116564e-06,
"loss": 1.7869,
"step": 123500
},
{
"epoch": 2.64,
"learning_rate": 6.0461373770886945e-06,
"loss": 1.7736,
"step": 124000
},
{
"epoch": 2.65,
"learning_rate": 5.868904060060827e-06,
"loss": 1.7825,
"step": 124500
},
{
"epoch": 2.66,
"learning_rate": 5.691670743032958e-06,
"loss": 1.7831,
"step": 125000
},
{
"epoch": 2.67,
"learning_rate": 5.51443742600509e-06,
"loss": 1.7831,
"step": 125500
},
{
"epoch": 2.68,
"learning_rate": 5.337204108977222e-06,
"loss": 1.7848,
"step": 126000
},
{
"epoch": 2.69,
"learning_rate": 5.159970791949354e-06,
"loss": 1.793,
"step": 126500
},
{
"epoch": 2.7,
"learning_rate": 4.982737474921486e-06,
"loss": 1.7887,
"step": 127000
},
{
"epoch": 2.71,
"learning_rate": 4.805504157893618e-06,
"loss": 1.7824,
"step": 127500
},
{
"epoch": 2.72,
"learning_rate": 4.62827084086575e-06,
"loss": 1.7885,
"step": 128000
},
{
"epoch": 2.73,
"learning_rate": 4.451037523837882e-06,
"loss": 1.7892,
"step": 128500
},
{
"epoch": 2.74,
"learning_rate": 4.273804206810013e-06,
"loss": 1.7832,
"step": 129000
},
{
"epoch": 2.75,
"learning_rate": 4.096570889782145e-06,
"loss": 1.7705,
"step": 129500
},
{
"epoch": 2.76,
"learning_rate": 3.919337572754276e-06,
"loss": 1.7775,
"step": 130000
},
{
"epoch": 2.78,
"learning_rate": 3.7421042557264087e-06,
"loss": 1.7737,
"step": 130500
},
{
"epoch": 2.79,
"learning_rate": 3.5648709386985406e-06,
"loss": 1.7858,
"step": 131000
},
{
"epoch": 2.8,
"learning_rate": 3.387637621670672e-06,
"loss": 1.784,
"step": 131500
},
{
"epoch": 2.81,
"learning_rate": 3.210404304642804e-06,
"loss": 1.7766,
"step": 132000
},
{
"epoch": 2.82,
"learning_rate": 3.033170987614936e-06,
"loss": 1.7806,
"step": 132500
},
{
"epoch": 2.83,
"learning_rate": 2.8559376705870676e-06,
"loss": 1.7574,
"step": 133000
},
{
"epoch": 2.84,
"learning_rate": 2.6787043535591996e-06,
"loss": 1.7697,
"step": 133500
},
{
"epoch": 2.85,
"learning_rate": 2.5014710365313316e-06,
"loss": 1.7834,
"step": 134000
},
{
"epoch": 2.86,
"learning_rate": 2.3242377195034635e-06,
"loss": 1.7644,
"step": 134500
},
{
"epoch": 2.87,
"learning_rate": 2.147004402475595e-06,
"loss": 1.7794,
"step": 135000
},
{
"epoch": 2.88,
"learning_rate": 1.9697710854477266e-06,
"loss": 1.7657,
"step": 135500
},
{
"epoch": 2.89,
"learning_rate": 1.7925377684198588e-06,
"loss": 1.7786,
"step": 136000
},
{
"epoch": 2.9,
"learning_rate": 1.6153044513919905e-06,
"loss": 1.7756,
"step": 136500
},
{
"epoch": 2.91,
"learning_rate": 1.4380711343641223e-06,
"loss": 1.7742,
"step": 137000
},
{
"epoch": 2.92,
"learning_rate": 1.2608378173362542e-06,
"loss": 1.7711,
"step": 137500
},
{
"epoch": 2.93,
"learning_rate": 1.0836045003083862e-06,
"loss": 1.7596,
"step": 138000
},
{
"epoch": 2.95,
"learning_rate": 9.063711832805178e-07,
"loss": 1.7777,
"step": 138500
},
{
"epoch": 2.96,
"learning_rate": 7.291378662526497e-07,
"loss": 1.7767,
"step": 139000
},
{
"epoch": 2.97,
"learning_rate": 5.519045492247815e-07,
"loss": 1.7738,
"step": 139500
},
{
"epoch": 2.98,
"learning_rate": 3.746712321969133e-07,
"loss": 1.787,
"step": 140000
},
{
"epoch": 2.99,
"learning_rate": 1.9743791516904515e-07,
"loss": 1.7725,
"step": 140500
},
{
"epoch": 3.0,
"learning_rate": 2.0204598141176972e-08,
"loss": 1.7788,
"step": 141000
},
{
"epoch": 3.0,
"step": 141057,
"total_flos": 3.722159267218719e+17,
"train_loss": 1.9451130962921488,
"train_runtime": 85182.9847,
"train_samples_per_second": 16.559,
"train_steps_per_second": 1.656
}
],
"max_steps": 141057,
"num_train_epochs": 3,
"total_flos": 3.722159267218719e+17,
"trial_name": null,
"trial_params": null
}