hansken_human_hql / trainer_state.json
hansh's picture
Model save
b7d4a55 verified
raw
history blame
38.9 kB
{
"best_metric": 0.23624150454998016,
"best_model_checkpoint": "data/hansken_human_hql/checkpoint-511",
"epoch": 9.995110024449877,
"eval_steps": 500,
"global_step": 1022,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009779951100244499,
"grad_norm": 1.0709587335586548,
"learning_rate": 9.80392156862745e-07,
"loss": 1.4707,
"step": 1
},
{
"epoch": 0.0488997555012225,
"grad_norm": 1.1860318183898926,
"learning_rate": 4.901960784313726e-06,
"loss": 1.4227,
"step": 5
},
{
"epoch": 0.097799511002445,
"grad_norm": 1.145372986793518,
"learning_rate": 9.803921568627451e-06,
"loss": 1.444,
"step": 10
},
{
"epoch": 0.1466992665036675,
"grad_norm": 1.0051456689834595,
"learning_rate": 1.4705882352941177e-05,
"loss": 1.3414,
"step": 15
},
{
"epoch": 0.19559902200489,
"grad_norm": 0.5133008360862732,
"learning_rate": 1.9607843137254903e-05,
"loss": 1.2116,
"step": 20
},
{
"epoch": 0.24449877750611246,
"grad_norm": 0.41832494735717773,
"learning_rate": 2.4509803921568626e-05,
"loss": 1.1424,
"step": 25
},
{
"epoch": 0.293398533007335,
"grad_norm": 0.41829216480255127,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.143,
"step": 30
},
{
"epoch": 0.3422982885085575,
"grad_norm": 0.35495856404304504,
"learning_rate": 3.431372549019608e-05,
"loss": 1.0472,
"step": 35
},
{
"epoch": 0.39119804400978,
"grad_norm": 0.40399229526519775,
"learning_rate": 3.9215686274509805e-05,
"loss": 0.9879,
"step": 40
},
{
"epoch": 0.4400977995110024,
"grad_norm": 0.31430941820144653,
"learning_rate": 4.411764705882353e-05,
"loss": 0.9467,
"step": 45
},
{
"epoch": 0.4889975550122249,
"grad_norm": 0.29712405800819397,
"learning_rate": 4.901960784313725e-05,
"loss": 0.885,
"step": 50
},
{
"epoch": 0.5378973105134475,
"grad_norm": 0.40078112483024597,
"learning_rate": 5.392156862745098e-05,
"loss": 0.7973,
"step": 55
},
{
"epoch": 0.58679706601467,
"grad_norm": 0.34199750423431396,
"learning_rate": 5.882352941176471e-05,
"loss": 0.776,
"step": 60
},
{
"epoch": 0.6356968215158925,
"grad_norm": 0.4243955910205841,
"learning_rate": 6.372549019607843e-05,
"loss": 0.651,
"step": 65
},
{
"epoch": 0.684596577017115,
"grad_norm": 0.30432572960853577,
"learning_rate": 6.862745098039216e-05,
"loss": 0.5769,
"step": 70
},
{
"epoch": 0.7334963325183375,
"grad_norm": 0.27279356122016907,
"learning_rate": 7.352941176470589e-05,
"loss": 0.5436,
"step": 75
},
{
"epoch": 0.78239608801956,
"grad_norm": 0.2576221823692322,
"learning_rate": 7.843137254901961e-05,
"loss": 0.501,
"step": 80
},
{
"epoch": 0.8312958435207825,
"grad_norm": 0.22290439903736115,
"learning_rate": 8.333333333333334e-05,
"loss": 0.4915,
"step": 85
},
{
"epoch": 0.8801955990220048,
"grad_norm": 0.21740856766700745,
"learning_rate": 8.823529411764706e-05,
"loss": 0.4487,
"step": 90
},
{
"epoch": 0.9290953545232273,
"grad_norm": 0.21560043096542358,
"learning_rate": 9.313725490196079e-05,
"loss": 0.4351,
"step": 95
},
{
"epoch": 0.9779951100244498,
"grad_norm": 0.2607389986515045,
"learning_rate": 9.80392156862745e-05,
"loss": 0.4508,
"step": 100
},
{
"epoch": 0.9975550122249389,
"eval_loss": 0.44326770305633545,
"eval_runtime": 398.4802,
"eval_samples_per_second": 1.029,
"eval_steps_per_second": 1.029,
"step": 102
},
{
"epoch": 1.0268948655256724,
"grad_norm": 0.21888603270053864,
"learning_rate": 0.00010294117647058823,
"loss": 0.3968,
"step": 105
},
{
"epoch": 1.075794621026895,
"grad_norm": 0.21742066740989685,
"learning_rate": 0.00010784313725490196,
"loss": 0.3785,
"step": 110
},
{
"epoch": 1.1246943765281174,
"grad_norm": 0.2523965537548065,
"learning_rate": 0.0001127450980392157,
"loss": 0.4034,
"step": 115
},
{
"epoch": 1.17359413202934,
"grad_norm": 0.2155005782842636,
"learning_rate": 0.00011764705882352942,
"loss": 0.3766,
"step": 120
},
{
"epoch": 1.2224938875305624,
"grad_norm": 0.25576308369636536,
"learning_rate": 0.00012254901960784316,
"loss": 0.364,
"step": 125
},
{
"epoch": 1.271393643031785,
"grad_norm": 0.2288295179605484,
"learning_rate": 0.00012745098039215687,
"loss": 0.3451,
"step": 130
},
{
"epoch": 1.3202933985330074,
"grad_norm": 0.2045079469680786,
"learning_rate": 0.0001323529411764706,
"loss": 0.3425,
"step": 135
},
{
"epoch": 1.36919315403423,
"grad_norm": 0.2297014445066452,
"learning_rate": 0.0001372549019607843,
"loss": 0.3658,
"step": 140
},
{
"epoch": 1.4180929095354524,
"grad_norm": 0.2170581817626953,
"learning_rate": 0.00014215686274509804,
"loss": 0.3482,
"step": 145
},
{
"epoch": 1.466992665036675,
"grad_norm": 0.2250969409942627,
"learning_rate": 0.00014705882352941178,
"loss": 0.3353,
"step": 150
},
{
"epoch": 1.5158924205378974,
"grad_norm": 0.23191578686237335,
"learning_rate": 0.00015196078431372549,
"loss": 0.3271,
"step": 155
},
{
"epoch": 1.56479217603912,
"grad_norm": 0.2477528601884842,
"learning_rate": 0.00015686274509803922,
"loss": 0.3549,
"step": 160
},
{
"epoch": 1.6136919315403424,
"grad_norm": 0.20846064388751984,
"learning_rate": 0.00016176470588235295,
"loss": 0.3171,
"step": 165
},
{
"epoch": 1.662591687041565,
"grad_norm": 0.21829602122306824,
"learning_rate": 0.0001666666666666667,
"loss": 0.3642,
"step": 170
},
{
"epoch": 1.7114914425427874,
"grad_norm": 0.22842282056808472,
"learning_rate": 0.0001715686274509804,
"loss": 0.3116,
"step": 175
},
{
"epoch": 1.76039119804401,
"grad_norm": 0.24106037616729736,
"learning_rate": 0.00017647058823529413,
"loss": 0.3066,
"step": 180
},
{
"epoch": 1.8092909535452324,
"grad_norm": 0.25696486234664917,
"learning_rate": 0.00018137254901960786,
"loss": 0.3053,
"step": 185
},
{
"epoch": 1.858190709046455,
"grad_norm": 0.22010771930217743,
"learning_rate": 0.00018627450980392157,
"loss": 0.3233,
"step": 190
},
{
"epoch": 1.9070904645476774,
"grad_norm": 0.2373352199792862,
"learning_rate": 0.0001911764705882353,
"loss": 0.3102,
"step": 195
},
{
"epoch": 1.9559902200488999,
"grad_norm": 0.21177123486995697,
"learning_rate": 0.000196078431372549,
"loss": 0.302,
"step": 200
},
{
"epoch": 1.9951100244498776,
"eval_loss": 0.3139691650867462,
"eval_runtime": 387.4792,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 1.058,
"step": 204
},
{
"epoch": 2.0048899755501224,
"grad_norm": 0.2193712592124939,
"learning_rate": 0.00019999985360565867,
"loss": 0.2813,
"step": 205
},
{
"epoch": 2.053789731051345,
"grad_norm": 0.3371932804584503,
"learning_rate": 0.00019999472984871732,
"loss": 0.2844,
"step": 210
},
{
"epoch": 2.1026894865525674,
"grad_norm": 0.23578821122646332,
"learning_rate": 0.00019998228680332932,
"loss": 0.263,
"step": 215
},
{
"epoch": 2.15158924205379,
"grad_norm": 0.27435311675071716,
"learning_rate": 0.00019996252538028507,
"loss": 0.2752,
"step": 220
},
{
"epoch": 2.2004889975550124,
"grad_norm": 0.24362725019454956,
"learning_rate": 0.00019993544702605638,
"loss": 0.2572,
"step": 225
},
{
"epoch": 2.249388753056235,
"grad_norm": 0.24360118806362152,
"learning_rate": 0.0001999010537226905,
"loss": 0.3191,
"step": 230
},
{
"epoch": 2.2982885085574574,
"grad_norm": 0.2612737715244293,
"learning_rate": 0.0001998593479876652,
"loss": 0.2506,
"step": 235
},
{
"epoch": 2.34718826405868,
"grad_norm": 0.21556636691093445,
"learning_rate": 0.00019981033287370443,
"loss": 0.2416,
"step": 240
},
{
"epoch": 2.3960880195599024,
"grad_norm": 0.22406277060508728,
"learning_rate": 0.00019975401196855482,
"loss": 0.273,
"step": 245
},
{
"epoch": 2.444987775061125,
"grad_norm": 0.3020350933074951,
"learning_rate": 0.00019969038939472315,
"loss": 0.2457,
"step": 250
},
{
"epoch": 2.4938875305623474,
"grad_norm": 0.20698243379592896,
"learning_rate": 0.00019961946980917456,
"loss": 0.2569,
"step": 255
},
{
"epoch": 2.54278728606357,
"grad_norm": 0.4294751286506653,
"learning_rate": 0.00019954125840299165,
"loss": 0.2246,
"step": 260
},
{
"epoch": 2.591687041564792,
"grad_norm": 0.37185847759246826,
"learning_rate": 0.00019945576090099452,
"loss": 0.229,
"step": 265
},
{
"epoch": 2.640586797066015,
"grad_norm": 0.2863105237483978,
"learning_rate": 0.00019936298356132176,
"loss": 0.2338,
"step": 270
},
{
"epoch": 2.689486552567237,
"grad_norm": 0.19301028549671173,
"learning_rate": 0.00019926293317497245,
"loss": 0.2167,
"step": 275
},
{
"epoch": 2.73838630806846,
"grad_norm": 0.22075964510440826,
"learning_rate": 0.00019915561706530883,
"loss": 0.2367,
"step": 280
},
{
"epoch": 2.787286063569682,
"grad_norm": 0.22829142212867737,
"learning_rate": 0.0001990410430875205,
"loss": 0.245,
"step": 285
},
{
"epoch": 2.836185819070905,
"grad_norm": 0.1982724666595459,
"learning_rate": 0.00019891921962804943,
"loss": 0.217,
"step": 290
},
{
"epoch": 2.885085574572127,
"grad_norm": 0.23672354221343994,
"learning_rate": 0.00019879015560397587,
"loss": 0.2298,
"step": 295
},
{
"epoch": 2.93398533007335,
"grad_norm": 0.21391943097114563,
"learning_rate": 0.00019865386046236596,
"loss": 0.2326,
"step": 300
},
{
"epoch": 2.982885085574572,
"grad_norm": 0.19821615517139435,
"learning_rate": 0.00019851034417958,
"loss": 0.2692,
"step": 305
},
{
"epoch": 2.9926650366748166,
"eval_loss": 0.2616053521633148,
"eval_runtime": 387.9813,
"eval_samples_per_second": 1.057,
"eval_steps_per_second": 1.057,
"step": 306
},
{
"epoch": 3.031784841075795,
"grad_norm": 0.22108572721481323,
"learning_rate": 0.0001983596172605423,
"loss": 0.2104,
"step": 310
},
{
"epoch": 3.0806845965770173,
"grad_norm": 0.24487629532814026,
"learning_rate": 0.00019820169073797228,
"loss": 0.1942,
"step": 315
},
{
"epoch": 3.12958435207824,
"grad_norm": 0.2110164612531662,
"learning_rate": 0.0001980365761715769,
"loss": 0.1833,
"step": 320
},
{
"epoch": 3.178484107579462,
"grad_norm": 0.20861805975437164,
"learning_rate": 0.0001978642856472045,
"loss": 0.1981,
"step": 325
},
{
"epoch": 3.227383863080685,
"grad_norm": 0.1969948559999466,
"learning_rate": 0.0001976848317759601,
"loss": 0.1868,
"step": 330
},
{
"epoch": 3.276283618581907,
"grad_norm": 0.19443638622760773,
"learning_rate": 0.0001974982276932824,
"loss": 0.1902,
"step": 335
},
{
"epoch": 3.32518337408313,
"grad_norm": 0.30058181285858154,
"learning_rate": 0.00019730448705798239,
"loss": 0.2241,
"step": 340
},
{
"epoch": 3.374083129584352,
"grad_norm": 0.21647138893604279,
"learning_rate": 0.00019710362405124334,
"loss": 0.1838,
"step": 345
},
{
"epoch": 3.422982885085575,
"grad_norm": 0.19676022231578827,
"learning_rate": 0.00019689565337558288,
"loss": 0.1961,
"step": 350
},
{
"epoch": 3.471882640586797,
"grad_norm": 0.2371009737253189,
"learning_rate": 0.00019668059025377703,
"loss": 0.2052,
"step": 355
},
{
"epoch": 3.52078239608802,
"grad_norm": 0.21014133095741272,
"learning_rate": 0.00019645845042774553,
"loss": 0.1987,
"step": 360
},
{
"epoch": 3.569682151589242,
"grad_norm": 0.2178957760334015,
"learning_rate": 0.00019622925015739997,
"loss": 0.1903,
"step": 365
},
{
"epoch": 3.618581907090465,
"grad_norm": 0.21497862040996552,
"learning_rate": 0.0001959930062194534,
"loss": 0.2,
"step": 370
},
{
"epoch": 3.667481662591687,
"grad_norm": 0.22582682967185974,
"learning_rate": 0.00019574973590619243,
"loss": 0.1868,
"step": 375
},
{
"epoch": 3.71638141809291,
"grad_norm": 0.1827058047056198,
"learning_rate": 0.00019549945702421144,
"loss": 0.2172,
"step": 380
},
{
"epoch": 3.765281173594132,
"grad_norm": 0.19827835261821747,
"learning_rate": 0.00019524218789310912,
"loss": 0.1785,
"step": 385
},
{
"epoch": 3.8141809290953548,
"grad_norm": 0.2121572494506836,
"learning_rate": 0.0001949779473441478,
"loss": 0.1795,
"step": 390
},
{
"epoch": 3.863080684596577,
"grad_norm": 0.20197761058807373,
"learning_rate": 0.0001947067547188747,
"loss": 0.19,
"step": 395
},
{
"epoch": 3.9119804400977998,
"grad_norm": 0.21854069828987122,
"learning_rate": 0.00019442862986770646,
"loss": 0.1886,
"step": 400
},
{
"epoch": 3.960880195599022,
"grad_norm": 0.20974552631378174,
"learning_rate": 0.0001941435931484761,
"loss": 0.177,
"step": 405
},
{
"epoch": 4.0,
"eval_loss": 0.24309584498405457,
"eval_runtime": 399.7773,
"eval_samples_per_second": 1.026,
"eval_steps_per_second": 1.026,
"step": 409
},
{
"epoch": 4.009779951100245,
"grad_norm": 0.19041913747787476,
"learning_rate": 0.0001938516654249428,
"loss": 0.1709,
"step": 410
},
{
"epoch": 4.058679706601467,
"grad_norm": 0.22610776126384735,
"learning_rate": 0.00019355286806526493,
"loss": 0.158,
"step": 415
},
{
"epoch": 4.10757946210269,
"grad_norm": 0.2044234424829483,
"learning_rate": 0.00019324722294043558,
"loss": 0.1522,
"step": 420
},
{
"epoch": 4.156479217603912,
"grad_norm": 0.2402704805135727,
"learning_rate": 0.00019293475242268223,
"loss": 0.1509,
"step": 425
},
{
"epoch": 4.205378973105135,
"grad_norm": 0.20224688947200775,
"learning_rate": 0.0001926154793838288,
"loss": 0.1565,
"step": 430
},
{
"epoch": 4.254278728606357,
"grad_norm": 0.21887710690498352,
"learning_rate": 0.00019228942719362143,
"loss": 0.1551,
"step": 435
},
{
"epoch": 4.30317848410758,
"grad_norm": 0.20886527001857758,
"learning_rate": 0.00019195661971801827,
"loss": 0.1568,
"step": 440
},
{
"epoch": 4.352078239608802,
"grad_norm": 0.21612216532230377,
"learning_rate": 0.00019161708131744222,
"loss": 0.1516,
"step": 445
},
{
"epoch": 4.400977995110025,
"grad_norm": 0.20036669075489044,
"learning_rate": 0.00019127083684499806,
"loss": 0.1529,
"step": 450
},
{
"epoch": 4.449877750611247,
"grad_norm": 0.3197900950908661,
"learning_rate": 0.00019091791164465305,
"loss": 0.1854,
"step": 455
},
{
"epoch": 4.49877750611247,
"grad_norm": 0.18851010501384735,
"learning_rate": 0.00019055833154938207,
"loss": 0.1574,
"step": 460
},
{
"epoch": 4.547677261613692,
"grad_norm": 0.214978888630867,
"learning_rate": 0.00019019212287927663,
"loss": 0.1555,
"step": 465
},
{
"epoch": 4.596577017114915,
"grad_norm": 0.21155217289924622,
"learning_rate": 0.00018981931243961824,
"loss": 0.176,
"step": 470
},
{
"epoch": 4.645476772616137,
"grad_norm": 0.18137674033641815,
"learning_rate": 0.00018943992751891653,
"loss": 0.1575,
"step": 475
},
{
"epoch": 4.69437652811736,
"grad_norm": 0.24663567543029785,
"learning_rate": 0.00018905399588691163,
"loss": 0.1568,
"step": 480
},
{
"epoch": 4.743276283618582,
"grad_norm": 0.19319510459899902,
"learning_rate": 0.0001886615457925417,
"loss": 0.1547,
"step": 485
},
{
"epoch": 4.792176039119805,
"grad_norm": 0.18611547350883484,
"learning_rate": 0.00018826260596187505,
"loss": 0.1755,
"step": 490
},
{
"epoch": 4.841075794621027,
"grad_norm": 0.47814473509788513,
"learning_rate": 0.00018785720559600752,
"loss": 0.1647,
"step": 495
},
{
"epoch": 4.88997555012225,
"grad_norm": 0.19350242614746094,
"learning_rate": 0.00018744537436892516,
"loss": 0.155,
"step": 500
},
{
"epoch": 4.938875305623472,
"grad_norm": 0.19956329464912415,
"learning_rate": 0.00018702714242533204,
"loss": 0.156,
"step": 505
},
{
"epoch": 4.987775061124695,
"grad_norm": 0.20709875226020813,
"learning_rate": 0.00018660254037844388,
"loss": 0.1616,
"step": 510
},
{
"epoch": 4.997555012224939,
"eval_loss": 0.23624150454998016,
"eval_runtime": 387.7068,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 1.058,
"step": 511
},
{
"epoch": 5.036674816625917,
"grad_norm": 0.22790652513504028,
"learning_rate": 0.00018617159930774715,
"loss": 0.1377,
"step": 515
},
{
"epoch": 5.08557457212714,
"grad_norm": 0.21796418726444244,
"learning_rate": 0.00018573435075672424,
"loss": 0.1326,
"step": 520
},
{
"epoch": 5.134474327628362,
"grad_norm": 0.19105204939842224,
"learning_rate": 0.00018529082673054457,
"loss": 0.1303,
"step": 525
},
{
"epoch": 5.183374083129585,
"grad_norm": 0.2682870328426361,
"learning_rate": 0.00018484105969372182,
"loss": 0.1316,
"step": 530
},
{
"epoch": 5.232273838630807,
"grad_norm": 0.18370023369789124,
"learning_rate": 0.00018438508256773785,
"loss": 0.1323,
"step": 535
},
{
"epoch": 5.28117359413203,
"grad_norm": 0.24072639644145966,
"learning_rate": 0.00018392292872863267,
"loss": 0.1332,
"step": 540
},
{
"epoch": 5.330073349633252,
"grad_norm": 0.19523735344409943,
"learning_rate": 0.00018345463200456164,
"loss": 0.1344,
"step": 545
},
{
"epoch": 5.378973105134475,
"grad_norm": 0.24865508079528809,
"learning_rate": 0.0001829802266733193,
"loss": 0.1359,
"step": 550
},
{
"epoch": 5.427872860635697,
"grad_norm": 0.2039840966463089,
"learning_rate": 0.00018249974745983023,
"loss": 0.1337,
"step": 555
},
{
"epoch": 5.47677261613692,
"grad_norm": 0.20024679601192474,
"learning_rate": 0.00018201322953360758,
"loss": 0.154,
"step": 560
},
{
"epoch": 5.525672371638142,
"grad_norm": 0.1976476013660431,
"learning_rate": 0.0001815207085061784,
"loss": 0.1353,
"step": 565
},
{
"epoch": 5.574572127139365,
"grad_norm": 0.1974327266216278,
"learning_rate": 0.00018102222042847737,
"loss": 0.1373,
"step": 570
},
{
"epoch": 5.623471882640587,
"grad_norm": 0.27005520462989807,
"learning_rate": 0.00018051780178820765,
"loss": 0.1437,
"step": 575
},
{
"epoch": 5.67237163814181,
"grad_norm": 0.20781448483467102,
"learning_rate": 0.00018000748950717038,
"loss": 0.1322,
"step": 580
},
{
"epoch": 5.721271393643032,
"grad_norm": 0.20179703831672668,
"learning_rate": 0.000179491320938562,
"loss": 0.1378,
"step": 585
},
{
"epoch": 5.770171149144255,
"grad_norm": 0.22105282545089722,
"learning_rate": 0.00017896933386423998,
"loss": 0.136,
"step": 590
},
{
"epoch": 5.819070904645477,
"grad_norm": 0.4113224446773529,
"learning_rate": 0.00017844156649195759,
"loss": 0.1495,
"step": 595
},
{
"epoch": 5.8679706601467,
"grad_norm": 0.20451286435127258,
"learning_rate": 0.00017790805745256704,
"loss": 0.1318,
"step": 600
},
{
"epoch": 5.916870415647922,
"grad_norm": 0.18566569685935974,
"learning_rate": 0.0001773688457971919,
"loss": 0.1359,
"step": 605
},
{
"epoch": 5.965770171149144,
"grad_norm": 0.1862591803073883,
"learning_rate": 0.0001768239709943686,
"loss": 0.1358,
"step": 610
},
{
"epoch": 5.995110024449878,
"eval_loss": 0.23938237130641937,
"eval_runtime": 387.8478,
"eval_samples_per_second": 1.057,
"eval_steps_per_second": 1.057,
"step": 613
},
{
"epoch": 6.014669926650367,
"grad_norm": 0.16670842468738556,
"learning_rate": 0.0001762734729271575,
"loss": 0.1275,
"step": 615
},
{
"epoch": 6.06356968215159,
"grad_norm": 0.23901741206645966,
"learning_rate": 0.00017571739189022365,
"loss": 0.1113,
"step": 620
},
{
"epoch": 6.112469437652812,
"grad_norm": 0.19317218661308289,
"learning_rate": 0.00017515576858688722,
"loss": 0.1101,
"step": 625
},
{
"epoch": 6.161369193154035,
"grad_norm": 0.21369099617004395,
"learning_rate": 0.00017458864412614434,
"loss": 0.1122,
"step": 630
},
{
"epoch": 6.210268948655257,
"grad_norm": 0.21011659502983093,
"learning_rate": 0.00017401606001965782,
"loss": 0.1136,
"step": 635
},
{
"epoch": 6.25916870415648,
"grad_norm": 0.1860456019639969,
"learning_rate": 0.00017343805817871886,
"loss": 0.1305,
"step": 640
},
{
"epoch": 6.308068459657702,
"grad_norm": 0.23417602479457855,
"learning_rate": 0.00017285468091117904,
"loss": 0.1165,
"step": 645
},
{
"epoch": 6.356968215158924,
"grad_norm": 0.189472958445549,
"learning_rate": 0.00017226597091835378,
"loss": 0.119,
"step": 650
},
{
"epoch": 6.405867970660147,
"grad_norm": 0.2460348904132843,
"learning_rate": 0.00017167197129189652,
"loss": 0.1188,
"step": 655
},
{
"epoch": 6.45476772616137,
"grad_norm": 0.20059679448604584,
"learning_rate": 0.00017107272551064473,
"loss": 0.1194,
"step": 660
},
{
"epoch": 6.503667481662592,
"grad_norm": 0.19838838279247284,
"learning_rate": 0.00017046827743743726,
"loss": 0.1165,
"step": 665
},
{
"epoch": 6.552567237163814,
"grad_norm": 0.20280085504055023,
"learning_rate": 0.00016985867131590383,
"loss": 0.1168,
"step": 670
},
{
"epoch": 6.601466992665037,
"grad_norm": 0.27974265813827515,
"learning_rate": 0.00016924395176722647,
"loss": 0.122,
"step": 675
},
{
"epoch": 6.65036674816626,
"grad_norm": 0.1994495540857315,
"learning_rate": 0.0001686241637868734,
"loss": 0.1173,
"step": 680
},
{
"epoch": 6.699266503667482,
"grad_norm": 0.20043040812015533,
"learning_rate": 0.00016799935274130546,
"loss": 0.1183,
"step": 685
},
{
"epoch": 6.748166259168704,
"grad_norm": 0.19184747338294983,
"learning_rate": 0.00016736956436465573,
"loss": 0.1192,
"step": 690
},
{
"epoch": 6.797066014669927,
"grad_norm": 0.20747938752174377,
"learning_rate": 0.00016673484475538146,
"loss": 0.1188,
"step": 695
},
{
"epoch": 6.84596577017115,
"grad_norm": 0.19285354018211365,
"learning_rate": 0.00016609524037289019,
"loss": 0.117,
"step": 700
},
{
"epoch": 6.894865525672372,
"grad_norm": 0.18242338299751282,
"learning_rate": 0.00016545079803413892,
"loss": 0.1208,
"step": 705
},
{
"epoch": 6.943765281173594,
"grad_norm": 0.19887416064739227,
"learning_rate": 0.00016480156491020727,
"loss": 0.1227,
"step": 710
},
{
"epoch": 6.992665036674817,
"grad_norm": 0.19773922860622406,
"learning_rate": 0.00016414758852284478,
"loss": 0.1199,
"step": 715
},
{
"epoch": 6.992665036674817,
"eval_loss": 0.24741248786449432,
"eval_runtime": 387.335,
"eval_samples_per_second": 1.059,
"eval_steps_per_second": 1.059,
"step": 715
},
{
"epoch": 7.041564792176039,
"grad_norm": 0.5106807351112366,
"learning_rate": 0.0001634889167409923,
"loss": 0.1051,
"step": 720
},
{
"epoch": 7.090464547677262,
"grad_norm": 0.18619847297668457,
"learning_rate": 0.0001628255977772784,
"loss": 0.0979,
"step": 725
},
{
"epoch": 7.139364303178484,
"grad_norm": 0.18676620721817017,
"learning_rate": 0.00016215768018449012,
"loss": 0.1009,
"step": 730
},
{
"epoch": 7.188264058679707,
"grad_norm": 0.2054695338010788,
"learning_rate": 0.00016148521285201927,
"loss": 0.1002,
"step": 735
},
{
"epoch": 7.237163814180929,
"grad_norm": 0.20496530830860138,
"learning_rate": 0.00016080824500228367,
"loss": 0.1011,
"step": 740
},
{
"epoch": 7.286063569682152,
"grad_norm": 0.18679122626781464,
"learning_rate": 0.0001601268261871244,
"loss": 0.1052,
"step": 745
},
{
"epoch": 7.334963325183374,
"grad_norm": 0.20614224672317505,
"learning_rate": 0.00015944100628417868,
"loss": 0.1021,
"step": 750
},
{
"epoch": 7.383863080684597,
"grad_norm": 0.20026642084121704,
"learning_rate": 0.00015875083549322908,
"loss": 0.1019,
"step": 755
},
{
"epoch": 7.432762836185819,
"grad_norm": 0.1852520853281021,
"learning_rate": 0.00015805636433252891,
"loss": 0.1028,
"step": 760
},
{
"epoch": 7.481662591687042,
"grad_norm": 0.19096429646015167,
"learning_rate": 0.0001573576436351046,
"loss": 0.1031,
"step": 765
},
{
"epoch": 7.530562347188264,
"grad_norm": 0.18263529241085052,
"learning_rate": 0.00015665472454503483,
"loss": 0.1033,
"step": 770
},
{
"epoch": 7.579462102689487,
"grad_norm": 0.1884106546640396,
"learning_rate": 0.00015594765851370684,
"loss": 0.1063,
"step": 775
},
{
"epoch": 7.628361858190709,
"grad_norm": 0.2005338817834854,
"learning_rate": 0.0001552364972960506,
"loss": 0.1054,
"step": 780
},
{
"epoch": 7.677261613691932,
"grad_norm": 0.184016153216362,
"learning_rate": 0.0001545212929467503,
"loss": 0.1048,
"step": 785
},
{
"epoch": 7.726161369193154,
"grad_norm": 0.19765067100524902,
"learning_rate": 0.0001538020978164341,
"loss": 0.1044,
"step": 790
},
{
"epoch": 7.775061124694377,
"grad_norm": 0.18265607953071594,
"learning_rate": 0.0001530789645478426,
"loss": 0.1051,
"step": 795
},
{
"epoch": 7.823960880195599,
"grad_norm": 0.19815443456172943,
"learning_rate": 0.00015235194607197508,
"loss": 0.1081,
"step": 800
},
{
"epoch": 7.872860635696822,
"grad_norm": 0.22219662368297577,
"learning_rate": 0.0001516210956042153,
"loss": 0.1071,
"step": 805
},
{
"epoch": 7.921760391198044,
"grad_norm": 0.20078670978546143,
"learning_rate": 0.0001508864666404365,
"loss": 0.1075,
"step": 810
},
{
"epoch": 7.970660146699267,
"grad_norm": 0.17794115841388702,
"learning_rate": 0.00015014811295308543,
"loss": 0.1051,
"step": 815
},
{
"epoch": 8.0,
"eval_loss": 0.2625426948070526,
"eval_runtime": 387.4946,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 1.058,
"step": 818
},
{
"epoch": 8.01955990220049,
"grad_norm": 0.1608039289712906,
"learning_rate": 0.0001494060885872464,
"loss": 0.0994,
"step": 820
},
{
"epoch": 8.06845965770171,
"grad_norm": 0.2323434203863144,
"learning_rate": 0.00014866044785668563,
"loss": 0.0895,
"step": 825
},
{
"epoch": 8.117359413202934,
"grad_norm": 0.17606528103351593,
"learning_rate": 0.0001479112453398753,
"loss": 0.0849,
"step": 830
},
{
"epoch": 8.166259168704157,
"grad_norm": 0.19025173783302307,
"learning_rate": 0.0001471585358759987,
"loss": 0.0886,
"step": 835
},
{
"epoch": 8.21515892420538,
"grad_norm": 0.1990627497434616,
"learning_rate": 0.00014640237456093634,
"loss": 0.0905,
"step": 840
},
{
"epoch": 8.2640586797066,
"grad_norm": 0.1725684553384781,
"learning_rate": 0.00014564281674323297,
"loss": 0.0899,
"step": 845
},
{
"epoch": 8.312958435207824,
"grad_norm": 0.18845060467720032,
"learning_rate": 0.00014487991802004623,
"loss": 0.0886,
"step": 850
},
{
"epoch": 8.361858190709047,
"grad_norm": 0.23856212198734283,
"learning_rate": 0.00014411373423307714,
"loss": 0.0924,
"step": 855
},
{
"epoch": 8.41075794621027,
"grad_norm": 0.18084120750427246,
"learning_rate": 0.00014334432146448272,
"loss": 0.0918,
"step": 860
},
{
"epoch": 8.45965770171149,
"grad_norm": 0.18600909411907196,
"learning_rate": 0.00014257173603277095,
"loss": 0.0913,
"step": 865
},
{
"epoch": 8.508557457212714,
"grad_norm": 0.1851680874824524,
"learning_rate": 0.00014179603448867835,
"loss": 0.0912,
"step": 870
},
{
"epoch": 8.557457212713937,
"grad_norm": 0.1818709820508957,
"learning_rate": 0.00014101727361103076,
"loss": 0.0903,
"step": 875
},
{
"epoch": 8.60635696821516,
"grad_norm": 0.19458520412445068,
"learning_rate": 0.00014023551040258725,
"loss": 0.0916,
"step": 880
},
{
"epoch": 8.65525672371638,
"grad_norm": 0.17777447402477264,
"learning_rate": 0.00013945080208586775,
"loss": 0.0928,
"step": 885
},
{
"epoch": 8.704156479217604,
"grad_norm": 0.20647075772285461,
"learning_rate": 0.00013866320609896447,
"loss": 0.0926,
"step": 890
},
{
"epoch": 8.753056234718827,
"grad_norm": 0.18589670956134796,
"learning_rate": 0.00013787278009133776,
"loss": 0.0934,
"step": 895
},
{
"epoch": 8.80195599022005,
"grad_norm": 0.19582615792751312,
"learning_rate": 0.00013707958191959608,
"loss": 0.0954,
"step": 900
},
{
"epoch": 8.85085574572127,
"grad_norm": 0.19688870012760162,
"learning_rate": 0.00013628366964326153,
"loss": 0.0925,
"step": 905
},
{
"epoch": 8.899755501222494,
"grad_norm": 0.1874823123216629,
"learning_rate": 0.00013548510152051963,
"loss": 0.0939,
"step": 910
},
{
"epoch": 8.948655256723717,
"grad_norm": 0.1876133382320404,
"learning_rate": 0.00013468393600395525,
"loss": 0.097,
"step": 915
},
{
"epoch": 8.99755501222494,
"grad_norm": 0.1735718548297882,
"learning_rate": 0.00013388023173627414,
"loss": 0.0945,
"step": 920
},
{
"epoch": 8.99755501222494,
"eval_loss": 0.27974453568458557,
"eval_runtime": 387.9073,
"eval_samples_per_second": 1.057,
"eval_steps_per_second": 1.057,
"step": 920
},
{
"epoch": 9.04645476772616,
"grad_norm": 0.1655295491218567,
"learning_rate": 0.00013307404754601013,
"loss": 0.0806,
"step": 925
},
{
"epoch": 9.095354523227384,
"grad_norm": 0.19395217299461365,
"learning_rate": 0.0001322654424432195,
"loss": 0.0788,
"step": 930
},
{
"epoch": 9.144254278728607,
"grad_norm": 0.18941174447536469,
"learning_rate": 0.00013145447561516138,
"loss": 0.0793,
"step": 935
},
{
"epoch": 9.19315403422983,
"grad_norm": 0.20010443031787872,
"learning_rate": 0.00013064120642196548,
"loss": 0.0807,
"step": 940
},
{
"epoch": 9.24205378973105,
"grad_norm": 0.20777645707130432,
"learning_rate": 0.00012982569439228713,
"loss": 0.08,
"step": 945
},
{
"epoch": 9.290953545232274,
"grad_norm": 0.173665389418602,
"learning_rate": 0.00012900799921895003,
"loss": 0.0808,
"step": 950
},
{
"epoch": 9.339853300733497,
"grad_norm": 0.20865468680858612,
"learning_rate": 0.0001281881807545769,
"loss": 0.0808,
"step": 955
},
{
"epoch": 9.38875305623472,
"grad_norm": 0.18372130393981934,
"learning_rate": 0.0001273662990072083,
"loss": 0.0804,
"step": 960
},
{
"epoch": 9.43765281173594,
"grad_norm": 0.1785283237695694,
"learning_rate": 0.00012654241413591054,
"loss": 0.0812,
"step": 965
},
{
"epoch": 9.486552567237164,
"grad_norm": 0.17695043981075287,
"learning_rate": 0.000125716586446372,
"loss": 0.0827,
"step": 970
},
{
"epoch": 9.535452322738386,
"grad_norm": 0.18287776410579681,
"learning_rate": 0.00012488887638648907,
"loss": 0.083,
"step": 975
},
{
"epoch": 9.58435207823961,
"grad_norm": 0.20748884975910187,
"learning_rate": 0.00012405934454194146,
"loss": 0.0816,
"step": 980
},
{
"epoch": 9.63325183374083,
"grad_norm": 0.18160052597522736,
"learning_rate": 0.00012322805163175762,
"loss": 0.0823,
"step": 985
},
{
"epoch": 9.682151589242054,
"grad_norm": 0.17889925837516785,
"learning_rate": 0.0001223950585038703,
"loss": 0.0822,
"step": 990
},
{
"epoch": 9.731051344743276,
"grad_norm": 0.1896965056657791,
"learning_rate": 0.00012156042613066258,
"loss": 0.0839,
"step": 995
},
{
"epoch": 9.7799511002445,
"grad_norm": 0.19203361868858337,
"learning_rate": 0.00012072421560450497,
"loss": 0.0828,
"step": 1000
},
{
"epoch": 9.82885085574572,
"grad_norm": 0.18262554705142975,
"learning_rate": 0.00011988648813328367,
"loss": 0.0838,
"step": 1005
},
{
"epoch": 9.877750611246944,
"grad_norm": 0.18471267819404602,
"learning_rate": 0.0001190473050359203,
"loss": 0.084,
"step": 1010
},
{
"epoch": 9.926650366748166,
"grad_norm": 0.18675756454467773,
"learning_rate": 0.00011820672773788353,
"loss": 0.0835,
"step": 1015
},
{
"epoch": 9.97555012224939,
"grad_norm": 0.17983846366405487,
"learning_rate": 0.00011736481776669306,
"loss": 0.0843,
"step": 1020
},
{
"epoch": 9.995110024449877,
"eval_loss": 0.2892283499240875,
"eval_runtime": 389.1699,
"eval_samples_per_second": 1.054,
"eval_steps_per_second": 1.054,
"step": 1022
},
{
"epoch": 9.995110024449877,
"step": 1022,
"total_flos": 7.585797735459062e+17,
"train_loss": 0.2326957560244605,
"train_runtime": 28787.7929,
"train_samples_per_second": 0.568,
"train_steps_per_second": 0.071
}
],
"logging_steps": 5,
"max_steps": 2040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.585797735459062e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}